Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer updates from Thomas Gleixner:
 "The timer department delivers this time:

   - Support for cross clock domain timestamps in the core code plus a
     first user.  That allows more precise timestamping for PTP and
     later for audio and other peripherals.

     The ptp/e1000e patches have been acked by the relevant maintainers
     and are carried in the timer tree to avoid merge ordering issues.

   - Support for unregistering the current clocksource watchdog.  That
     lifts a limitation for switching clocksources which has been there
     from day 1

   - The usual pile of fixes and updates to the core and the drivers.
     Nothing outstanding and exciting"

* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  time/timekeeping: Work around false positive GCC warning
  e1000e: Adds hardware supported cross timestamp on e1000e nic
  ptp: Add PTP_SYS_OFFSET_PRECISE for driver crosstimestamping
  x86/tsc: Always Running Timer (ART) correlated clocksource
  hrtimer: Revert CLOCK_MONOTONIC_RAW support
  time: Add history to cross timestamp interface supporting slower devices
  time: Add driver cross timestamp interface for higher precision time synchronization
  time: Remove duplicated code in ktime_get_raw_and_real()
  time: Add timekeeping snapshot code capturing system time and counter
  time: Add cycles to nanoseconds translation
  jiffies: Use CLOCKSOURCE_MASK instead of constant
  clocksource: Introduce clocksource_freq2mult()
  clockevents/drivers/exynos_mct: Implement ->set_state_oneshot_stopped()
  clockevents/drivers/arm_global_timer: Implement ->set_state_oneshot_stopped()
  clockevents/drivers/arm_arch_timer: Implement ->set_state_oneshot_stopped()
  clocksource/drivers/arm_global_timer: Register delay timer
  clocksource/drivers/lpc32xx: Support timer-based ARM delay
  clocksource/drivers/lpc32xx: Support periodic mode
  clocksource/drivers/lpc32xx: Don't use the prescaler counter for clockevents
  clocksource/drivers/rockchip: Add err handle for rk_timer_init
  ...
diff --git a/.mailmap b/.mailmap
index b1e9a97..7e6c533 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/DocBook/media/v4l/media-types.xml b/Documentation/DocBook/media/v4l/media-types.xml
index 1af3842..0ee0f33 100644
--- a/Documentation/DocBook/media/v4l/media-types.xml
+++ b/Documentation/DocBook/media/v4l/media-types.xml
@@ -57,10 +57,6 @@
 	    <entry>Connector for a RGB composite signal.</entry>
 	  </row>
 	  <row>
-	    <entry><constant>MEDIA_ENT_F_CONN_TEST</constant></entry>
-	    <entry>Connector for a test generator.</entry>
-	  </row>
-	  <row>
 	    <entry><constant>MEDIA_ENT_F_CAM_SENSOR</constant></entry>
 	    <entry>Camera video sensor entity.</entry>
 	  </row>
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index 7b57fc0..49585b6 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -3,7 +3,7 @@
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 65b3eac..ff49cf9 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -7,7 +7,7 @@
 conventions of cgroup v2.  It describes all userland-visible aspects
 of cgroup including core and specific controller behaviors.  All
 future changes must be reflected in this document.  Documentation for
-v1 is available under Documentation/cgroup-legacy/.
+v1 is available under Documentation/cgroup-v1/.
 
 CONTENTS
 
@@ -843,6 +843,10 @@
 		Amount of memory used to cache filesystem data,
 		including tmpfs and shared memory.
 
+	  sock
+
+		Amount of memory used in network transmission buffers
+
 	  file_mapped
 
 		Amount of cached filesystem data mapped with mmap()
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index a2bd593..66422d6 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -23,6 +23,7 @@
   during suspend.
 - ti,no-reset-on-init: When present, the module should not be reset at init
 - ti,no-idle-on-init: When present, the module should not be idled at init
+- ti,no-idle: When present, the module is never allowed to idle.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt
index ace0599..20df350 100644
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3036-cru.txt
@@ -30,7 +30,7 @@
 clock-output-names:
  - "xin24m" - crystal input - required,
  - "ext_i2s" - external I2S clock - optional,
- - "ext_gmac" - external GMAC clock - optional
+ - "rmii_clkin" - external EMAC clock - optional
 
 Example: Clock controller node:
 
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
index 7803e77..007a5b4 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
@@ -24,9 +24,8 @@
 		1 = edge triggered
 		4 = level triggered
 
-  Cells 4 and beyond are reserved for future use. When the 1st cell
-  has a value of 0 or 1, cells 4 and beyond act as padding, and may be
-  ignored. It is recommended that padding cells have a value of 0.
+  Cells 4 and beyond are reserved for future use and must have a value
+  of 0 if present.
 
 - reg : Specifies base physical address(s) and size of the GIC
   registers, in the following order:
diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
index 451fef2..10587bd 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
@@ -68,7 +68,7 @@
 		phy1: ethernet-phy@1 {
 			max-speed = <1000>;
 			reg = <0x1>;
-			compatible = "brcm,28nm-gphy", "ethernet-phy-ieee802.3-c22";
+			compatible = "ethernet-phy-ieee802.3-c22";
 		};
 	};
 };
@@ -115,7 +115,7 @@
 		phy0: ethernet-phy@0 {
 			max-speed = <1000>;
 			reg = <0x0>;
-			compatible = "brcm,bcm53125", "ethernet-phy-ieee802.3-c22";
+			compatible = "ethernet-phy-ieee802.3-c22";
 		};
 	};
 };
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index 80411b2..ecacfa4 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -4,8 +4,6 @@
 - compatible: should be "hisilicon,hns-dsaf-v1" or "hisilicon,hns-dsaf-v2".
   "hisilicon,hns-dsaf-v1" is for hip05.
   "hisilicon,hns-dsaf-v2" is for Hi1610 and Hi1612.
-- dsa-name: dsa fabric name who provide this interface.
-  should be "dsafX", X is the dsaf id.
 - mode: dsa fabric mode string. only support one of dsaf modes like these:
 		"2port-64vf",
 		"6port-16rss",
@@ -26,9 +24,8 @@
 
 Example:
 
-dsa: dsa@c7000000 {
+dsaf0: dsa@c7000000 {
 	compatible = "hisilicon,hns-dsaf-v1";
-	dsa_name = "dsaf0";
 	mode = "6port-16rss";
 	interrupt-parent = <&mbigen_dsa>;
 	reg = <0x0 0xC0000000 0x0 0x420000
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
index 41d19be..e6a9d1c 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
@@ -4,8 +4,9 @@
 - compatible: "hisilicon,hns-nic-v1" or "hisilicon,hns-nic-v2".
   "hisilicon,hns-nic-v1" is for hip05.
   "hisilicon,hns-nic-v2" is for Hi1610 and Hi1612.
-- ae-name: accelerator name who provides this interface,
-  is simply a name referring to the name of name in the accelerator node.
+- ae-handle: accelerator engine handle for hns,
+  specifies a reference to the associating hardware driver node.
+  see Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
 - port-id: is the index of port provided by DSAF (the accelerator). DSAF can
   connect to 8 PHYs. Port 0 to 1 are both used for adminstration purpose. They
   are called debug ports.
@@ -41,7 +42,7 @@
 
 	ethernet@0{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <0>;
 		local-mac-address = [a2 14 e4 4b 56 76];
 	};
diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index aeea50c..d0cb869 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -6,12 +6,17 @@
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
 - phy-mode: See ethernet.txt file in the same directory
-- clocks: a pointer to the reference clock for this device.
+- clocks: List of clocks for this device. At least one clock is
+  mandatory for the core clock. If several clocks are given, then the
+  clock-names property must be used to identify them.
 
 Optional properties:
 - tx-csum-limit: maximum mtu supported by port that allow TX checksum.
   Value is presented in bytes. If not used, by default 1600B is set for
   "marvell,armada-370-neta" and 9800B for others.
+- clock-names: List of names corresponding to clocks property; shall be
+  "core" for core clock and "bus" for the optional bus clock.
+
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt b/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
index 7938411..694987d 100644
--- a/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
+++ b/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
@@ -38,7 +38,6 @@
 
 			phy11: ethernet-phy@1 {
 				reg = <1>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -48,7 +47,6 @@
 			};
 			phy12: ethernet-phy@2 {
 				reg = <2>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -58,7 +56,6 @@
 			};
 			phy13: ethernet-phy@3 {
 				reg = <3>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -68,7 +65,6 @@
 			};
 			phy14: ethernet-phy@4 {
 				reg = <4>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -85,7 +81,6 @@
 
 			phy21: ethernet-phy@1 {
 				reg = <1>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -95,7 +90,6 @@
 			};
 			phy22: ethernet-phy@2 {
 				reg = <2>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -105,7 +99,6 @@
 			};
 			phy23: ethernet-phy@3 {
 				reg = <3>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -115,7 +108,6 @@
 			};
 			phy24: ethernet-phy@4 {
 				reg = <4>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
diff --git a/Documentation/devicetree/bindings/net/mdio-mux.txt b/Documentation/devicetree/bindings/net/mdio-mux.txt
index f65606f..491f5bd 100644
--- a/Documentation/devicetree/bindings/net/mdio-mux.txt
+++ b/Documentation/devicetree/bindings/net/mdio-mux.txt
@@ -47,7 +47,6 @@
 
 			phy11: ethernet-phy@1 {
 				reg = <1>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -57,7 +56,6 @@
 			};
 			phy12: ethernet-phy@2 {
 				reg = <2>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -67,7 +65,6 @@
 			};
 			phy13: ethernet-phy@3 {
 				reg = <3>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -77,7 +74,6 @@
 			};
 			phy14: ethernet-phy@4 {
 				reg = <4>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -94,7 +90,6 @@
 
 			phy21: ethernet-phy@1 {
 				reg = <1>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -104,7 +99,6 @@
 			};
 			phy22: ethernet-phy@2 {
 				reg = <2>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -114,7 +108,6 @@
 			};
 			phy23: ethernet-phy@3 {
 				reg = <3>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
@@ -124,7 +117,6 @@
 			};
 			phy24: ethernet-phy@4 {
 				reg = <4>;
-				compatible = "marvell,88e1149r";
 				marvell,reg-init = <3 0x10 0 0x5777>,
 					<3 0x11 0 0x00aa>,
 					<3 0x12 0 0x4105>,
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
index 525e165..bc1c3c8 100644
--- a/Documentation/devicetree/bindings/net/phy.txt
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -17,8 +17,7 @@
   "ethernet-phy-ieee802.3-c22" or "ethernet-phy-ieee802.3-c45" for
   PHYs that implement IEEE802.3 clause 22 or IEEE802.3 clause 45
   specifications. If neither of these are specified, the default is to
-  assume clause 22. The compatible list may also contain other
-  elements.
+  assume clause 22.
 
   If the phy's identifier is known then the list may contain an entry
   of the form: "ethernet-phy-idAAAA.BBBB" where
@@ -28,6 +27,9 @@
             4 hex digits. This is the chip vendor OUI bits 19:24,
             followed by 10 bits of a vendor specific ID.
 
+  The compatible list should not contain other values than those
+  listed here.
+
 - max-speed: Maximum PHY supported speed (10, 100, 1000...)
 
 - broken-turn-around: If set, indicates the PHY device does not correctly
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 81a9f9e..c8ac222 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -82,8 +82,8 @@
 				  "ch16", "ch17", "ch18", "ch19",
 				  "ch20", "ch21", "ch22", "ch23",
 				  "ch24";
-		clocks = <&mstp8_clks R8A7795_CLK_ETHERAVB>;
-		power-domains = <&cpg_clocks>;
+		clocks = <&cpg CPG_MOD 812>;
+		power-domains = <&cpg>;
 		phy-mode = "rgmii-id";
 		phy-handle = <&phy0>;
 
diff --git a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
index 4e8b90e..07a7509 100644
--- a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
+++ b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
@@ -8,6 +8,7 @@
 Required properties:
 - compatible: "renesas,pci-r8a7790" for the R8A7790 SoC;
 	      "renesas,pci-r8a7791" for the R8A7791 SoC;
+	      "renesas,pci-r8a7793" for the R8A7793 SoC;
 	      "renesas,pci-r8a7794" for the R8A7794 SoC;
 	      "renesas,pci-rcar-gen2" for a generic R-Car Gen2 compatible device
 
diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt
index 558fe52..6cf9969 100644
--- a/Documentation/devicetree/bindings/pci/rcar-pci.txt
+++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt
@@ -4,6 +4,7 @@
 compatible: "renesas,pcie-r8a7779" for the R8A7779 SoC;
 	    "renesas,pcie-r8a7790" for the R8A7790 SoC;
 	    "renesas,pcie-r8a7791" for the R8A7791 SoC;
+	    "renesas,pcie-r8a7793" for the R8A7793 SoC;
 	    "renesas,pcie-r8a7795" for the R8A7795 SoC;
 	    "renesas,pcie-rcar-gen2" for a generic R-Car Gen2 compatible device.
 
diff --git a/Documentation/devicetree/bindings/regulator/tps65217.txt b/Documentation/devicetree/bindings/regulator/tps65217.txt
index d181096..4f05d20 100644
--- a/Documentation/devicetree/bindings/regulator/tps65217.txt
+++ b/Documentation/devicetree/bindings/regulator/tps65217.txt
@@ -26,11 +26,7 @@
 		ti,pmic-shutdown-controller;
 
 		regulators {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
 			dcdc1_reg: dcdc1 {
-				reg = <0>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <1800000>;
 				regulator-boot-on;
@@ -38,7 +34,6 @@
 			};
 
 			dcdc2_reg: dcdc2 {
-				reg = <1>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -46,7 +41,6 @@
 			};
 
 			dcdc3_reg: dcc3 {
-				reg = <2>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <1500000>;
 				regulator-boot-on;
@@ -54,7 +48,6 @@
 			};
 
 			ldo1_reg: ldo1 {
-				reg = <3>;
 				regulator-min-microvolt = <1000000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -62,7 +55,6 @@
 			};
 
 			ldo2_reg: ldo2 {
-				reg = <4>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -70,7 +62,6 @@
 			};
 
 			ldo3_reg: ldo3 {
-				reg = <5>;
 				regulator-min-microvolt = <1800000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -78,7 +69,6 @@
 			};
 
 			ldo4_reg: ldo4 {
-				reg = <6>;
 				regulator-min-microvolt = <1800000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
index ac2fcd6..1068ffc 100644
--- a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
+++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
@@ -14,6 +14,10 @@
   interrupt number is the rtc alarm interrupt and second interrupt number
   is the rtc tick interrupt. The number of cells representing a interrupt
   depends on the parent interrupt controller.
+- clocks: Must contain a list of phandle and clock specifier for the rtc
+          and source clocks.
+- clock-names: Must contain "rtc" and "rtc_src" entries sorted in the
+               same order as the clocks property.
 
 Example:
 
@@ -21,4 +25,6 @@
 		compatible = "samsung,s3c6410-rtc";
 		reg = <0x10070000 0x100>;
 		interrupts = <44 0 45 0>;
+		clocks = <&clock CLK_RTC>, <&s2mps11_osc S2MPS11_CLK_AP>;
+		clock-names = "rtc", "rtc_src";
 	};
diff --git a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
index 35ae1fb..ed94c21 100644
--- a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
+++ b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
@@ -9,7 +9,7 @@
 - fsl,uart-has-rtscts : Indicate the uart has rts and cts
 - fsl,irda-mode : Indicate the uart supports irda mode
 - fsl,dte-mode : Indicate the uart works in DTE mode. The uart works
-                  is DCE mode by default.
+                  in DCE mode by default.
 
 Note: Each uart controller should have an alias correctly numbered
 in "aliases" node.
diff --git a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
index ce55c0a..4da41bf 100644
--- a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
@@ -30,6 +30,8 @@
  "fsl,imx-audio-sgtl5000"
  (compatible with Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt)
 
+ "fsl,imx-audio-wm8960"
+
 Required properties:
 
   - compatible		: Contains one of entries in the compatible list.
diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
index 332e625..e5ee3f1 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
@@ -1,8 +1,9 @@
 * Renesas R-Car Thermal
 
 Required properties:
-- compatible		: "renesas,thermal-<soctype>", "renesas,rcar-thermal"
-			  as fallback.
+- compatible		: "renesas,thermal-<soctype>",
+			   "renesas,rcar-gen2-thermal" (with thermal-zone) or
+			   "renesas,rcar-thermal" (without thermal-zone) as fallback.
 			  Examples with soctypes are:
 			    - "renesas,thermal-r8a73a4" (R-Mobile APE6)
 			    - "renesas,thermal-r8a7779" (R-Car H1)
@@ -36,3 +37,35 @@
 		0xe61f0300 0x38>;
 	interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
 };
+
+Example (with thermal-zone):
+
+thermal-zones {
+	cpu_thermal: cpu-thermal {
+		polling-delay-passive	= <1000>;
+		polling-delay		= <5000>;
+
+		thermal-sensors = <&thermal>;
+
+		trips {
+			cpu-crit {
+				temperature	= <115000>;
+				hysteresis	= <0>;
+				type		= "critical";
+			};
+		};
+		cooling-maps {
+		};
+	};
+};
+
+thermal: thermal@e61f0000 {
+	compatible =	"renesas,thermal-r8a7790",
+			"renesas,rcar-gen2-thermal",
+			"renesas,rcar-thermal";
+	reg = <0 0xe61f0000 0 0x14>, <0 0xe61f0100 0 0x38>;
+	interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&mstp5_clks R8A7790_CLK_THERMAL>;
+	power-domains = <&cpg_clocks>;
+	#thermal-sensor-cells = <0>;
+};
diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.txt
index c477af0..686a64b 100644
--- a/Documentation/filesystems/efivarfs.txt
+++ b/Documentation/filesystems/efivarfs.txt
@@ -14,3 +14,10 @@
 efivarfs is typically mounted like this,
 
 	mount -t efivarfs none /sys/firmware/efi/efivars
+
+Due to the presence of numerous firmware bugs where removing non-standard
+UEFI variables causes the system firmware to fail to POST, efivarfs
+files that are not well-known standardized variables are created
+as immutable files.  This doesn't prevent removal - "chattr -i" will work -
+but it does prevent this kind of failure from being accomplished
+accidentally.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fde9fd0..843b045b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -240,8 +240,8 @@
  RssFile                     size of resident file mappings
  RssShmem                    size of resident shmem memory (includes SysV shm,
                              mapping of tmpfs and shared anonymous mappings)
- VmData                      size of data, stack, and text segments
- VmStk                       size of data, stack, and text segments
+ VmData                      size of private data segments
+ VmStk                       size of stack segments
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
@@ -356,7 +356,7 @@
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
 a7cb2000-a7eb2000 rw-p 00000000 00:00 0
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack:1001]
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
 a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
 a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
@@ -388,7 +388,6 @@
 
  [heap]                   = the heap of the program
  [stack]                  = the stack of the main process
- [stack:1001]             = the stack of the thread with tid 1001
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
 
@@ -396,10 +395,8 @@
 
 The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
 of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. This is a key difference from the
-content of /proc/PID/maps, where you will see all mappings that are being used
-as stack by all of those tasks. Hence, for the example above, the task-level
-map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
+as [stack] if that task sees it as a stack. Hence, for the example above, the
+task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
 
 08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 87d40a7..4324f24 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -666,7 +666,7 @@
 
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
-			arch/x86/include/asm/cpufeature.h for the valid bit
+			arch/x86/include/asm/cpufeatures.h for the valid bit
 			numbers. Note the Linux specific bits are not necessarily
 			stable over kernel options, but the vendor specific
 			ones should be.
@@ -1496,6 +1496,11 @@
 			could change it dynamically, usually by
 			/sys/module/printk/parameters/ignore_loglevel.
 
+	ignore_rlimit_data
+			Ignore RLIMIT_DATA setting for data mappings,
+			print warning at first misuse.  Can be changed via
+			/sys/module/kernel/parameters/ignore_rlimit_data.
+
 	ihash_entries=	[KNL]
 			Set number of hash buckets for inode cache.
 
@@ -2561,6 +2566,8 @@
 
 	nointroute	[IA-64]
 
+	noinvpcid	[X86] Disable the INVPCID cpu feature.
+
 	nojitter	[IA-64] Disables jitter checking for ITC timers.
 
 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
@@ -3486,6 +3493,10 @@
 
 	ro		[KNL] Mount root device read-only on boot
 
+	rodata=		[KNL]
+		on	Mark read-only kernel memory as read-only (default).
+		off	Leave read-only kernel memory writable for debugging.
+
 	root=		[KNL] Root filesystem
 			See name_to_dev_t comment in init/do_mounts.c.
 
@@ -3523,6 +3534,11 @@
 
 	sched_debug	[KNL] Enables verbose scheduler debug messages.
 
+	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
+			Allowed values are enable and disable. This feature
+			incurs a small amount of overhead in the scheduler
+			but is useful for debugging and performance tuning.
+
 	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
 			xtime_lock contention on larger systems, and/or RCU lock
 			contention on all systems with CONFIG_MAXSMP set.
@@ -4230,6 +4246,17 @@
 			The default value of this parameter is determined by
 			the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
 
+	workqueue.debug_force_rr_cpu
+			Workqueue used to implicitly guarantee that work
+			items queued without explicit CPU specified are put
+			on the local CPU.  This guarantee is no longer true
+			and while local CPU is still preferred work items
+			may be put on foreign CPUs.  This debug option
+			forces round-robin CPU selection to flush out
+			usages which depend on the now broken guarantee.
+			When enabled, memory and cache locality will be
+			impacted.
+
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ceb44a0..73b36d7 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -594,7 +594,7 @@
 
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
-	will be retransmitted. Should not be higher than 255. Default value
+	will be retransmitted. Should not be higher than 127. Default value
 	is 6, which corresponds to 63seconds till the last retransmission
 	with the current initial RTO of 1second. With this the final timeout
 	for an active TCP connection attempt will happen after 127seconds.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a93b414..f4444c9 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -58,6 +58,8 @@
 - panic_on_stackoverflow
 - panic_on_unrecovered_nmi
 - panic_on_warn
+- perf_cpu_time_max_percent
+- perf_event_paranoid
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -639,6 +641,17 @@
 
 ==============================================================
 
+perf_event_paranoid:
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN).  The default value is 1.
+
+ -1: Allow use of (almost) all events by all users
+>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+>=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+
+==============================================================
 
 pid_max:
 
@@ -760,6 +773,14 @@
 
 ==============================================================
 
+sched_schedstats:
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+==============================================================
+
 sg-big-buff:
 
 This file shows the size of the generic SCSI (sg) buffer.
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt
index 767392f..a484d2c 100644
--- a/Documentation/timers/hpet.txt
+++ b/Documentation/timers/hpet.txt
@@ -1,9 +1,7 @@
 		High Precision Event Timer Driver for Linux
 
 The High Precision Event Timer (HPET) hardware follows a specification
-by Intel and Microsoft which can be found at
-
-	http://www.intel.com/hardwaredesign/hpetspec_1.pdf
+by Intel and Microsoft, revision 1.
 
 Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
 and up to 32 comparators.  Normally three or more comparators are provided,
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 053f613..07e4cdf 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3025,7 +3025,7 @@
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index daf9c0f..c817310 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -358,7 +358,8 @@
 - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
   the kernel may now execute it.  We handle this by also setting spte.nx.
   If we get a user fetch or read fault, we'll change spte.u=1 and
-  spte.nx=gpte.nx back.
+  spte.nx=gpte.nx back.  For this to work, KVM forces EFER.NX to 1 when
+  shadow paging is in use.
 - if CR4.SMAP is disabled: since the page has been changed to a kernel
   page, it can not be reused when CR4.SMAP is enabled. We set
   CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 9f9ec9f..4e4b6f1 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -400,3 +400,7 @@
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
+sun4v_wdt:
+timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
+nowayout: Watchdog cannot be stopped once started
+-------------------------------------------------
diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt
index d62bea6..c956d99 100644
--- a/Documentation/x86/early-microcode.txt
+++ b/Documentation/x86/early-microcode.txt
@@ -40,3 +40,28 @@
 find . | cpio -o -H newc >../ucode.cpio
 cd ..
 cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
+
+Builtin microcode
+=================
+
+We can also load builtin microcode supplied through the regular firmware
+builtin method CONFIG_FIRMWARE_IN_KERNEL. Here's an example:
+
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
+CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
+
+This basically means, you have the following tree structure locally:
+
+/lib/firmware/
+|-- amd-ucode
+...
+|   |-- microcode_amd_fam15h.bin
+...
+|-- intel-ucode
+...
+|   |-- 06-3a-09
+...
+
+so that the build system can find those files and integrate them into
+the final kernel image. The early loader finds them and applies them.
diff --git a/Documentation/x86/exception-tables.txt b/Documentation/x86/exception-tables.txt
index 32901aa..e396bcd 100644
--- a/Documentation/x86/exception-tables.txt
+++ b/Documentation/x86/exception-tables.txt
@@ -290,3 +290,38 @@
 only use exceptions for code in the .text section.  Any other section
 will cause the exception table to not be sorted correctly, and the
 exceptions will fail.
+
+Things changed when 64-bit support was added to x86 Linux. Rather than
+double the size of the exception table by expanding the two entries
+from 32-bits to 64 bits, a clever trick was used to store addresses
+as relative offsets from the table itself. The assembly code changed
+from:
+	.long 1b,3b
+to:
+        .long (from) - .
+        .long (to) - .
+
+and the C-code that uses these values converts back to absolute addresses
+like this:
+
+	ex_insn_addr(const struct exception_table_entry *x)
+	{
+		return (unsigned long)&x->insn + x->insn;
+	}
+
+In v4.6 the exception table entry was expanded with a new field "handler".
+This is also 32-bits wide and contains a third relative function
+pointer which points to one of:
+
+1) int ex_handler_default(const struct exception_table_entry *fixup)
+   This is legacy case that just jumps to the fixup code
+2) int ex_handler_fault(const struct exception_table_entry *fixup)
+   This case provides the fault number of the trap that occurred at
+   entry->insn. It is used to distinguish page faults from machine
+   check.
+3) int ex_handler_ext(const struct exception_table_entry *fixup)
+   This case is used for uaccess_err ... we need to set a flag
+   in the task structure. Before the handler functions existed this
+   case was handled by adding a large offset to the fixup to tag
+   it as special.
+More functions can easily be added.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 68ed311..0965a71 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -60,6 +60,8 @@
 		threshold to 1. Enabling this may make memory predictive failure
 		analysis less effective if the bios sets thresholds for memory
 		errors since we will not see details for all errors.
+   mce=recovery
+		Force-enable recoverable machine check code paths
 
    nomce (for compatibility with i386): same as mce=off
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 30aca4a..2061ea7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -223,9 +223,7 @@
 
 ABI/API
 L:	linux-api@vger.kernel.org
-F:	Documentation/ABI/
 F:	include/linux/syscalls.h
-F:	include/uapi/
 F:	kernel/sys_ni.c
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
@@ -686,13 +684,6 @@
 S:	Supported
 F:	drivers/macintosh/ams/
 
-AMSO1100 RNIC DRIVER
-M:	Tom Tucker <tom@opengridcomputing.com>
-M:	Steve Wise <swise@opengridcomputing.com>
-L:	linux-rdma@vger.kernel.org
-S:	Maintained
-F:	drivers/infiniband/hw/amso1100/
-
 ANALOG DEVICES INC AD9389B DRIVER
 M:	Hans Verkuil <hans.verkuil@cisco.com>
 L:	linux-media@vger.kernel.org
@@ -929,17 +920,24 @@
 S:	Maintained
 F:	drivers/clk/sunxi/
 
-ARM/Amlogic MesonX SoC support
+ARM/Amlogic Meson SoC support
 M:	Carlo Caione <carlo@caione.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-meson@googlegroups.com
+W:	http://linux-meson.com/
 S:	Maintained
-F:	drivers/media/rc/meson-ir.c
-N:	meson[x68]
+F:	arch/arm/mach-meson/
+F:	arch/arm/boot/dts/meson*
+N:	meson
 
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
+M:	Antoine Tenart <antoine.tenart@free-electrons.com>
 S:	Maintained
 F:	arch/arm/mach-alpine/
+F:	arch/arm/boot/dts/alpine*
+F:	arch/arm64/boot/dts/al/
+F:	drivers/*/*alpine*
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:	Nicolas Ferre <nicolas.ferre@atmel.com>
@@ -967,6 +965,8 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-highbank/
+F:	arch/arm/boot/dts/highbank.dts
+F:	arch/arm/boot/dts/ecx-*.dts*
 
 ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
 M:	Krzysztof Halasa <khalasa@piap.pl>
@@ -1042,6 +1042,7 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
 S:	Maintained
+F:	arch/arm/boot/dts/prima2*
 F:	arch/arm/mach-prima2/
 F:	drivers/clk/sirf/
 F:	drivers/clocksource/timer-prima2.c
@@ -1143,6 +1144,10 @@
 S:	Supported
 T:	git git://github.com/hisilicon/linux-hisi.git
 F:	arch/arm/mach-hisi/
+F:	arch/arm/boot/dts/hi3*
+F:	arch/arm/boot/dts/hip*
+F:	arch/arm/boot/dts/hisi*
+F:	arch/arm64/boot/dts/hisilicon/
 
 ARM/HP JORNADA 7XX MACHINE SUPPORT
 M:	Kristoffer Ericson <kristoffer.ericson@gmail.com>
@@ -1219,6 +1224,7 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-keystone/
+F:	arch/arm/boot/dts/k2*
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
 
 ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@ -1287,6 +1293,7 @@
 S:	Maintained
 F:	arch/arm/mach-berlin/
 F:	arch/arm/boot/dts/berlin*
+F:	arch/arm64/boot/dts/marvell/berlin*
 
 
 ARM/Marvell Dove/MV78xx0/Orion SOC support
@@ -1425,6 +1432,7 @@
 F:	arch/arm/boot/dts/qcom-*.dts
 F:	arch/arm/boot/dts/qcom-*.dtsi
 F:	arch/arm/mach-qcom/
+F:	arch/arm64/boot/dts/qcom/*
 F:	drivers/soc/qcom/
 F:	drivers/tty/serial/msm_serial.h
 F:	drivers/tty/serial/msm_serial.c
@@ -1441,8 +1449,8 @@
 ARM/RENESAS ARM64 ARCHITECTURE
 M:	Simon Horman <horms@verge.net.au>
 M:	Magnus Damm <magnus.damm@gmail.com>
-L:	linux-sh@vger.kernel.org
-Q:	http://patchwork.kernel.org/project/linux-sh/list/
+L:	linux-renesas-soc@vger.kernel.org
+Q:	http://patchwork.kernel.org/project/linux-renesas-soc/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
 S:	Supported
 F:	arch/arm64/boot/dts/renesas/
@@ -1484,6 +1492,8 @@
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/s3c*
+F:	arch/arm/boot/dts/s5p*
+F:	arch/arm/boot/dts/samsung*
 F:	arch/arm/boot/dts/exynos*
 F:	arch/arm64/boot/dts/exynos/
 F:	arch/arm/plat-samsung/
@@ -1563,6 +1573,7 @@
 F:	arch/arm/mach-socfpga/
 F:	arch/arm/boot/dts/socfpga*
 F:	arch/arm/configs/socfpga_defconfig
+F:	arch/arm64/boot/dts/altera/
 W:	http://www.rocketboards.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 
@@ -1716,7 +1727,7 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/vexpress*
-F:	arch/arm64/boot/dts/arm/vexpress*
+F:	arch/arm64/boot/dts/arm/
 F:	arch/arm/mach-vexpress/
 F:	*/*/vexpress*
 F:	*/*/*/vexpress*
@@ -2343,6 +2354,7 @@
 F:	arch/arm/boot/dts/bcm113*
 F:	arch/arm/boot/dts/bcm216*
 F:	arch/arm/boot/dts/bcm281*
+F:	arch/arm64/boot/dts/broadcom/
 F:	arch/arm/configs/bcm_defconfig
 F:	drivers/mmc/host/sdhci-bcm-kona.c
 F:	drivers/clocksource/bcm_kona_timer.c
@@ -2357,14 +2369,6 @@
 S:	Maintained
 N:	bcm2835
 
-BROADCOM BCM33XX MIPS ARCHITECTURE
-M:	Kevin Cernekee <cernekee@gmail.com>
-L:	linux-mips@linux-mips.org
-S:	Maintained
-F:	arch/mips/bcm3384/*
-F:	arch/mips/include/asm/mach-bcm3384/*
-F:	arch/mips/kernel/*bmips*
-
 BROADCOM BCM47XX MIPS ARCHITECTURE
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 M:	Rafał Miłecki <zajec5@gmail.com>
@@ -3445,9 +3449,8 @@
 F:	drivers/usb/dwc2/
 
 DESIGNWARE USB3 DRD IP DRIVER
-M:	Felipe Balbi <balbi@ti.com>
+M:	Felipe Balbi <balbi@kernel.org>
 L:	linux-usb@vger.kernel.org
-L:	linux-omap@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
 F:	drivers/usb/dwc3/
@@ -4184,13 +4187,6 @@
 S:	Orphan
 F:	fs/efs/
 
-EHCA (IBM GX bus InfiniBand adapter) DRIVER
-M:	Hoang-Nam Nguyen <hnguyen@de.ibm.com>
-M:	Christoph Raisch <raisch@de.ibm.com>
-L:	linux-rdma@vger.kernel.org
-S:	Supported
-F:	drivers/infiniband/hw/ehca/
-
 EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
 M:	Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
 L:	netdev@vger.kernel.org
@@ -4522,6 +4518,12 @@
 S:	Maintained
 F:	drivers/dma/fsldma.*
 
+FREESCALE GPMI NAND DRIVER
+M:	Han Xu <han.xu@nxp.com>
+L:	linux-mtd@lists.infradead.org
+S:	Maintained
+F:	drivers/mtd/nand/gpmi-nand/*
+
 FREESCALE I2C CPM DRIVER
 M:	Jochen Friedrich <jochen@scram.de>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -4538,7 +4540,7 @@
 F:	drivers/video/fbdev/imxfb.c
 
 FREESCALE QUAD SPI DRIVER
-M:	Han Xu <han.xu@freescale.com>
+M:	Han Xu <han.xu@nxp.com>
 L:	linux-mtd@lists.infradead.org
 S:	Maintained
 F:	drivers/mtd/spi-nor/fsl-quadspi.c
@@ -4552,6 +4554,15 @@
 F:	drivers/net/ethernet/freescale/fs_enet/
 F:	include/linux/fs_enet_pd.h
 
+FREESCALE IMX / MXC FEC DRIVER
+M:	Fugang Duan <fugang.duan@nxp.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/fec_main.c
+F:	drivers/net/ethernet/freescale/fec_ptp.c
+F:	drivers/net/ethernet/freescale/fec.h
+F:	Documentation/devicetree/bindings/net/fsl-fec.txt
+
 FREESCALE QUICC ENGINE LIBRARY
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Orphan
@@ -5809,12 +5820,6 @@
 S:	Maintained
 F:	net/ipv4/netfilter/ipt_MASQUERADE.c
 
-IPATH DRIVER
-M:	Mike Marciniszyn <infinipath@intel.com>
-L:	linux-rdma@vger.kernel.org
-S:	Maintained
-F:	drivers/staging/rdma/ipath/
-
 IPMI SUBSYSTEM
 M:	Corey Minyard <minyard@acm.org>
 L:	openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
@@ -6144,7 +6149,7 @@
 
 KERNEL SELFTEST FRAMEWORK
 M:	Shuah Khan <shuahkh@osg.samsung.com>
-L:	linux-api@vger.kernel.org
+L:	linux-kselftest@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/shuah/linux-kselftest
 S:	Maintained
 F:	tools/testing/selftests
@@ -6774,6 +6779,7 @@
 F:	Documentation/networking/mac80211-injection.txt
 F:	include/net/mac80211.h
 F:	net/mac80211/
+F:	drivers/net/wireless/mac80211_hwsim.[ch]
 
 MACVLAN DRIVER
 M:	Patrick McHardy <kaber@trash.net>
@@ -7370,7 +7376,7 @@
 F:	include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M:	Felipe Balbi <balbi@ti.com>
+M:	Bin Liu <b-liu@ti.com>
 L:	linux-usb@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
@@ -7393,6 +7399,17 @@
 S:	Supported
 F:	drivers/net/ethernet/myricom/myri10ge/
 
+NAND FLASH SUBSYSTEM
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+R:	Richard Weinberger <richard@nod.at>
+L:	linux-mtd@lists.infradead.org
+W:	http://www.linux-mtd.infradead.org/
+Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
+T:	git git://github.com/linux-nand/linux.git
+S:	Maintained
+F:	drivers/mtd/nand/
+F:	include/linux/mtd/nand*.h
+
 NATSEMI ETHERNET DRIVER (DP8381x)
 S:	Orphan
 F:	drivers/net/ethernet/natsemi/natsemi.c
@@ -7702,13 +7719,13 @@
 F:	arch/nios2/
 
 NOKIA N900 POWER SUPPLY DRIVERS
-M:	Pali Rohár <pali.rohar@gmail.com>
-S:	Maintained
+R:	Pali Rohár <pali.rohar@gmail.com>
 F:	include/linux/power/bq2415x_charger.h
 F:	include/linux/power/bq27xxx_battery.h
 F:	include/linux/power/isp1704_charger.h
 F:	drivers/power/bq2415x_charger.c
 F:	drivers/power/bq27xxx_battery.c
+F:	drivers/power/bq27xxx_battery_i2c.c
 F:	drivers/power/isp1704_charger.c
 F:	drivers/power/rx51_battery.c
 
@@ -7939,11 +7956,9 @@
 F:	drivers/staging/media/omap4iss/
 
 OMAP USB SUPPORT
-M:	Felipe Balbi <balbi@ti.com>
 L:	linux-usb@vger.kernel.org
 L:	linux-omap@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
-S:	Maintained
+S:	Orphan
 F:	drivers/usb/*/*omap*
 F:	arch/arm/*omap*/usb*
 
@@ -8460,6 +8475,7 @@
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
 M:	Arnaldo Carvalho de Melo <acme@kernel.org>
+R:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 S:	Supported
@@ -8818,6 +8834,7 @@
 T:	git git://github.com/hzhuang1/linux.git
 T:	git git://github.com/rjarzmik/linux.git
 S:	Maintained
+F:	arch/arm/boot/dts/pxa*
 F:	arch/arm/mach-pxa/
 F:	drivers/dma/pxa*
 F:	drivers/pcmcia/pxa2xx*
@@ -8847,6 +8864,7 @@
 T:	git git://github.com/hzhuang1/linux.git
 T:	git git://git.linaro.org/people/ycmiao/pxa-linux.git
 S:	Maintained
+F:	arch/arm/boot/dts/mmp*
 F:	arch/arm/mach-mmp/
 
 PXA MMCI DRIVER
@@ -9572,6 +9590,12 @@
 S:	Maintained
 F:	drivers/thunderbolt/
 
+TI BQ27XXX POWER SUPPLY DRIVER
+R:	Andrew F. Davis <afd@ti.com>
+F:	include/linux/power/bq27xxx_battery.h
+F:	drivers/power/bq27xxx_battery.c
+F:	drivers/power/bq27xxx_battery_i2c.c
+
 TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
 M:	John Stultz <john.stultz@linaro.org>
 M:	Thomas Gleixner <tglx@linutronix.de>
@@ -9793,10 +9817,11 @@
 F:	drivers/scsi/be2iscsi/
 
 Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER
-M:	Sathya Perla <sathya.perla@avagotech.com>
-M:	Ajit Khaparde <ajit.khaparde@avagotech.com>
-M:	Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
-M:	Sriharsha Basavapatna <sriharsha.basavapatna@avagotech.com>
+M:	Sathya Perla <sathya.perla@broadcom.com>
+M:	Ajit Khaparde <ajit.khaparde@broadcom.com>
+M:	Padmanabh Ratnakar <padmanabh.ratnakar@broadcom.com>
+M:	Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
+M:	Somnath Kotur <somnath.kotur@broadcom.com>
 L:	netdev@vger.kernel.org
 W:	http://www.emulex.com
 S:	Supported
@@ -10158,6 +10183,7 @@
 F:	drivers/media/pci/solo6x10/
 
 SOFTWARE RAID (Multiple Disks) SUPPORT
+M:	Shaohua Li <shli@kernel.org>
 L:	linux-raid@vger.kernel.org
 T:	git git://neil.brown.name/md
 S:	Supported
@@ -10173,7 +10199,7 @@
 
 SONICS SILICON BACKPLANE DRIVER (SSB)
 M:	Michael Buesch <m@bues.ch>
-L:	netdev@vger.kernel.org
+L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F:	drivers/ssb/
 F:	include/linux/ssb/
@@ -10291,6 +10317,7 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
 S:	Maintained
+F:	arch/arm/boot/dts/spear*
 F:	arch/arm/mach-spear/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
@@ -11318,7 +11345,7 @@
 F:	drivers/usb/host/ehci*
 
 USB GADGET/PERIPHERAL SUBSYSTEM
-M:	Felipe Balbi <balbi@ti.com>
+M:	Felipe Balbi <balbi@kernel.org>
 L:	linux-usb@vger.kernel.org
 W:	http://www.linux-usb.org/gadget
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -11394,7 +11421,7 @@
 F:	drivers/net/usb/pegasus.*
 
 USB PHY LAYER
-M:	Felipe Balbi <balbi@ti.com>
+M:	Felipe Balbi <balbi@kernel.org>
 L:	linux-usb@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
@@ -12024,7 +12051,6 @@
 F:	arch/arm64/include/asm/xen/
 
 XEN NETWORK BACKEND DRIVER
-M:	Ian Campbell <ian.campbell@citrix.com>
 M:	Wei Liu <wei.liu2@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
@@ -12133,7 +12159,7 @@
 F:	drivers/net/hamradio/z8530.h
 
 ZBUD COMPRESSED PAGE ALLOCATOR
-M:	Seth Jennings <sjennings@variantweb.net>
+M:	Seth Jennings <sjenning@redhat.com>
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/zbud.c
@@ -12188,7 +12214,7 @@
 F:	Documentation/vm/zsmalloc.txt
 
 ZSWAP COMPRESSED SWAP CACHING
-M:	Seth Jennings <sjennings@variantweb.net>
+M:	Seth Jennings <sjenning@redhat.com>
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/zswap.c
diff --git a/Makefile b/Makefile
index c65fe37..7b3ecdc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 76dde9d..8a188bc 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -12,8 +12,6 @@
 	select BUILDTIME_EXTABLE_SORT
 	select COMMON_CLK
 	select CLONE_BACKWARDS
-	# ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev
-	select DEVTMPFS if !INITRAMFS_SOURCE=""
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_FIND_FIRST_BIT
@@ -275,14 +273,6 @@
 	default "0xA0000000"
 	depends on ARC_HAS_DCCM
 
-config ARC_HAS_HW_MPY
-	bool "Use Hardware Multiplier (Normal or Faster XMAC)"
-	default y
-	help
-	  Influences how gcc generates code for MPY operations.
-	  If enabled, MPYxx insns are generated, provided by Standard/XMAC
-	  Multipler. Otherwise software multipy lib is used
-
 choice
 	prompt "MMU Version"
 	default ARC_MMU_V3 if ARC_CPU_770
@@ -338,6 +328,19 @@
 
 endchoice
 
+choice
+	prompt "MMU Super Page Size"
+	depends on ISA_ARCV2 && TRANSPARENT_HUGEPAGE
+	default ARC_HUGEPAGE_2M
+
+config ARC_HUGEPAGE_2M
+	bool "2MB"
+
+config ARC_HUGEPAGE_16M
+	bool "16MB"
+
+endchoice
+
 if ISA_ARCOMPACT
 
 config ARC_COMPACT_IRQ_LEVELS
@@ -410,7 +413,7 @@
 	default n
 	depends on !SMP
 
-config ARC_HAS_GRTC
+config ARC_HAS_GFRC
 	bool "SMP synchronized 64-bit cycle counter"
 	default y
 	depends on SMP
@@ -529,14 +532,6 @@
 	  Counts number of I and D TLB Misses and exports them via Debugfs
 	  The counters can be cleared via Debugfs as well
 
-if SMP
-
-config ARC_IPI_DBG
-	bool "Debug Inter Core interrupts"
-	default n
-
-endif
-
 endif
 
 config ARC_UBOOT_SUPPORT
@@ -566,6 +561,12 @@
 endmenu	 # "ARC Architecture Configuration"
 
 source "mm/Kconfig"
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	default "12" if ARC_HUGEPAGE_16M
+	default "11"
+
 source "net/Kconfig"
 source "drivers/Kconfig"
 source "fs/Kconfig"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index aeb1902..c8230f3 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -74,10 +74,6 @@
 # --build-id w/o "-marclinux". Default arc-elf32-ld is OK
 ldflags-$(upto_gcc44)			+= -marclinux
 
-ifndef CONFIG_ARC_HAS_HW_MPY
-	cflags-y	+= -mno-mpy
-endif
-
 LIBGCC	:= $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
 
 # Modules with short calls might break for calls into builtin-kernel
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index f1ac981..5d4e2a0 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -39,6 +39,7 @@
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -73,7 +74,6 @@
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
@@ -91,12 +91,10 @@
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 323486d..87ee46b 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -39,14 +39,10 @@
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -78,14 +74,12 @@
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -97,12 +91,10 @@
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 66191cd..d80daf4 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -40,14 +40,10 @@
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -79,14 +75,12 @@
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -98,12 +92,10 @@
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 138f9d8..f410953 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -4,6 +4,7 @@
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -26,7 +27,6 @@
 CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -34,6 +34,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -51,7 +52,6 @@
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -63,4 +63,3 @@
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index f68838e..cfaa33c 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -35,6 +35,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +50,6 @@
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -61,4 +61,3 @@
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 96bd1c2..bb2a8dc 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -2,6 +2,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -21,13 +22,11 @@
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -35,6 +34,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +49,6 @@
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -60,4 +59,3 @@
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 31e1d95..646182e 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -33,6 +33,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -58,7 +59,6 @@
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index fcae666..ceca254 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -34,12 +34,12 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_OSCI_LAN=y
 CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
@@ -58,7 +58,6 @@
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index b01b659..4b6da90 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -2,6 +2,7 @@
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -18,15 +19,11 @@
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-CONFIG_ARC_HAS_LL64=y
-# CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
@@ -40,6 +37,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -56,14 +54,11 @@
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_NET_OSCI_LAN=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_LIBPS2=y
 CONFIG_SERIO_ARC_PS2=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
@@ -75,9 +70,6 @@
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-CONFIG_ARCPGU_RGB888=y
-CONFIG_ARCPGU_DISPTYPE=0
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 3b4dc9c..9b342ea 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -3,6 +3,7 @@
 CONFIG_DEFAULT_HOSTNAME="tb10x"
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -26,12 +27,10 @@
 # CONFIG_BLOCK is not set
 CONFIG_ARC_PLAT_TB10X=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
-CONFIG_ARC_STACK_NONEXEC=y
 CONFIG_HZ=250
 CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
 CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -44,8 +43,8 @@
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_PROC_DEVICETREE=y
 CONFIG_NETDEVICES=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -55,9 +54,6 @@
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 CONFIG_STMMAC_ETH=y
-CONFIG_STMMAC_DEBUG_FS=y
-CONFIG_STMMAC_DA=y
-CONFIG_STMMAC_CHAINED=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
@@ -91,7 +87,6 @@
 CONFIG_LEDS_TRIGGER_TRANSIENT=y
 CONFIG_DMADEVICES=y
 CONFIG_DW_DMAC=y
-CONFIG_NET_DMA=y
 CONFIG_ASYNC_TX_DMA=y
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
@@ -100,17 +95,16 @@
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_MAGIC_SYSRQ=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index f36c047..7359859 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -16,7 +16,7 @@
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-# CONFIG_ARC_HAS_GRTC is not set
+# CONFIG_ARC_HAS_GFRC is not set
 CONFIG_ARC_UBOOT_SUPPORT=y
 CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
 CONFIG_PREEMPT=y
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 7fac7d8..f9f4c6f 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -10,7 +10,8 @@
 #define _ASM_ARC_ARCREGS_H
 
 /* Build Configuration Registers */
-#define ARC_REG_DCCMBASE_BCR	0x61	/* DCCM Base Addr */
+#define ARC_REG_AUX_DCCM	0x18	/* DCCM Base Addr ARCv2 */
+#define ARC_REG_DCCM_BASE_BUILD	0x61	/* DCCM Base Addr ARCompact */
 #define ARC_REG_CRC_BCR		0x62
 #define ARC_REG_VECBASE_BCR	0x68
 #define ARC_REG_PERIBASE_BCR	0x69
@@ -18,10 +19,10 @@
 #define ARC_REG_DPFP_BCR	0x6C	/* ARCompact: Dbl Precision FPU */
 #define ARC_REG_FP_V2_BCR	0xc8	/* ARCv2 FPU */
 #define ARC_REG_SLC_BCR		0xce
-#define ARC_REG_DCCM_BCR	0x74	/* DCCM Present + SZ */
+#define ARC_REG_DCCM_BUILD	0x74	/* DCCM size (common) */
 #define ARC_REG_TIMERS_BCR	0x75
 #define ARC_REG_AP_BCR		0x76
-#define ARC_REG_ICCM_BCR	0x78
+#define ARC_REG_ICCM_BUILD	0x78	/* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR	0x79
 #define ARC_REG_MAC_BCR		0x7a
 #define ARC_REG_MUL_BCR		0x7b
@@ -36,6 +37,7 @@
 #define ARC_REG_IRQ_BCR		0xF3
 #define ARC_REG_SMART_BCR	0xFF
 #define ARC_REG_CLUSTER_BCR	0xcf
+#define ARC_REG_AUX_ICCM	0x208	/* ICCM Base Addr (ARCv2) */
 
 /* status32 Bits Positions */
 #define STATUS_AE_BIT		5	/* Exception active */
@@ -246,7 +248,7 @@
 #endif
 };
 
-struct bcr_iccm {
+struct bcr_iccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int base:16, pad:5, sz:3, ver:8;
 #else
@@ -254,17 +256,15 @@
 #endif
 };
 
-/* DCCM Base Address Register: ARC_REG_DCCMBASE_BCR */
-struct bcr_dccm_base {
+struct bcr_iccm_arcv2 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int addr:24, ver:8;
+	unsigned int pad:8, sz11:4, sz01:4, sz10:4, sz00:4, ver:8;
 #else
-	unsigned int ver:8, addr:24;
+	unsigned int ver:8, sz00:4, sz10:4, sz01:4, sz11:4, pad:8;
 #endif
 };
 
-/* DCCM RAM Configuration Register: ARC_REG_DCCM_BCR */
-struct bcr_dccm {
+struct bcr_dccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int res:21, sz:3, ver:8;
 #else
@@ -272,6 +272,14 @@
 #endif
 };
 
+struct bcr_dccm_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:12, cyc:3, pad1:1, sz1:4, sz0:4, ver:8;
+#else
+	unsigned int ver:8, sz0:4, sz1:4, pad1:1, cyc:3, pad2:12;
+#endif
+};
+
 /* ARCompact: Both SP and DP FPU BCRs have same format */
 struct bcr_fp_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -315,9 +323,9 @@
 
 struct bcr_generic {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int pad:24, ver:8;
+	unsigned int info:24, ver:8;
 #else
-	unsigned int ver:8, pad:24;
+	unsigned int ver:8, info:24;
 #endif
 };
 
@@ -349,14 +357,13 @@
 	struct cpuinfo_arc_bpu bpu;
 	struct bcr_identity core;
 	struct bcr_isa isa;
-	struct bcr_timer timers;
 	unsigned int vec_base;
 	struct cpuinfo_arc_ccm iccm, dccm;
 	struct {
 		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
 			     fpu_sp:1, fpu_dp:1, pad2:6,
 			     debug:1, ap:1, smart:1, rtt:1, pad3:4,
-			     pad4:8;
+			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
 	} extn;
 	struct bcr_mpy extn_mpy;
 	struct bcr_extn_xymem extn_xymem;
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 4fd7d62..49014f0 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -16,11 +16,9 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
-#define IPI_IRQ		(NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */
 #else
 #define TIMER0_IRQ      16
 #define TIMER1_IRQ      17
-#define IPI_IRQ         19
 #endif
 
 #include <linux/interrupt.h>
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index 258b0e5..37c2f75 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -22,6 +22,7 @@
 #define AUX_IRQ_CTRL		0x00E
 #define AUX_IRQ_ACT		0x043	/* Active Intr across all levels */
 #define AUX_IRQ_LVL_PEND	0x200	/* Pending Intr across all levels */
+#define AUX_IRQ_HINT		0x201	/* For generating Soft Interrupts */
 #define AUX_IRQ_PRIORITY	0x206
 #define ICAUSE			0x40a
 #define AUX_IRQ_SELECT		0x40b
@@ -30,8 +31,11 @@
 /* Was Intr taken in User Mode */
 #define AUX_IRQ_ACT_BIT_U	31
 
-/* 0 is highest level, but taken by FIRQs, if present in design */
-#define ARCV2_IRQ_DEF_PRIO		0
+/*
+ * User space should be interruptable even by lowest prio interrupt
+ * Safe even if actual interrupt priorities is fewer or even one
+ */
+#define ARCV2_IRQ_DEF_PRIO	15
 
 /* seed value for status register */
 #define ISA_INIT_STATUS_BITS	(STATUS_IE_MASK | STATUS_AD_MASK | \
@@ -112,6 +116,16 @@
 	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
+static inline void arc_softirq_trigger(int irq)
+{
+	write_aux_reg(AUX_IRQ_HINT, irq);
+}
+
+static inline void arc_softirq_clear(int irq)
+{
+	write_aux_reg(AUX_IRQ_HINT, 0);
+}
+
 #else
 
 .macro IRQ_DISABLE  scratch
diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h
index 46f4e53..847e3bb 100644
--- a/arch/arc/include/asm/mcip.h
+++ b/arch/arc/include/asm/mcip.h
@@ -39,8 +39,8 @@
 #define CMD_DEBUG_SET_MASK		0x34
 #define CMD_DEBUG_SET_SELECT		0x36
 
-#define CMD_GRTC_READ_LO		0x42
-#define CMD_GRTC_READ_HI		0x43
+#define CMD_GFRC_READ_LO		0x42
+#define CMD_GFRC_READ_HI		0x43
 
 #define CMD_IDU_ENABLE			0x71
 #define CMD_IDU_DISABLE			0x72
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 57af2f0..d426d42 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -179,37 +179,44 @@
 #define __S111  PAGE_U_X_W_R
 
 /****************************************************************
- * Page Table Lookup split
+ * 2 tier (PGD:PTE) software page walker
  *
- * We implement 2 tier paging and since this is all software, we are free
- * to customize the span of a PGD / PTE entry to suit us
- *
- *			32 bit virtual address
+ * [31]		    32 bit virtual address              [0]
  * -------------------------------------------------------
- * | BITS_FOR_PGD    |  BITS_FOR_PTE    |  BITS_IN_PAGE  |
+ * |               | <------------ PGDIR_SHIFT ----------> |
+ * |		   |					 |
+ * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
  * -------------------------------------------------------
  *       |                  |                |
  *       |                  |                --> off in page frame
- *       |		    |
  *       |                  ---> index into Page Table
- *       |
  *       ----> index into Page Directory
+ *
+ * In a single page size configuration, only PAGE_SHIFT is fixed
+ * So both PGD and PTE sizing can be tweaked
+ *  e.g. 8K page (PAGE_SHIFT 13) can have
+ *  - PGDIR_SHIFT 21  -> 11:8:13 address split
+ *  - PGDIR_SHIFT 24  -> 8:11:13 address split
+ *
+ * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
+ * so the sizing flexibility is gone.
  */
 
-#define BITS_IN_PAGE	PAGE_SHIFT
-
-/* Optimal Sizing of Pg Tbl - based on MMU page size */
-#if defined(CONFIG_ARC_PAGE_SIZE_8K)
-#define BITS_FOR_PTE	8		/* 11:8:13 */
-#elif defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define BITS_FOR_PTE	8		/* 10:8:14 */
-#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define BITS_FOR_PTE	9		/* 11:9:12 */
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT	24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT	21
+#else
+/*
+ * Only Normal page support so "hackable" (see comment above)
+ * Default value provides 11:8:13 (8K), 11:9:12 (4K)
+ */
+#define PGDIR_SHIFT	21
 #endif
 
-#define BITS_FOR_PGD	(32 - BITS_FOR_PTE - BITS_IN_PAGE)
+#define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
+#define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
 
-#define PGDIR_SHIFT	(32 - BITS_FOR_PGD)
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)	/* vaddr span, not PDG sz */
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index cbfec79..c126460 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -45,11 +45,12 @@
 VECTOR	handle_interrupt	; (16) Timer0
 VECTOR	handle_interrupt	; unused (Timer1)
 VECTOR	handle_interrupt	; unused (WDT)
-VECTOR	handle_interrupt	; (19) ICI (inter core interrupt)
-VECTOR	handle_interrupt
-VECTOR	handle_interrupt
-VECTOR	handle_interrupt
-VECTOR	handle_interrupt	; (23) End of fixed IRQs
+VECTOR	handle_interrupt	; (19) Inter core Interrupt (IPI)
+VECTOR	handle_interrupt	; (20) perf Interrupt
+VECTOR	handle_interrupt	; (21) Software Triggered Intr (Self IPI)
+VECTOR	handle_interrupt	; unused
+VECTOR	handle_interrupt	; (23) unused
+# End of fixed IRQs
 
 .rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
 	VECTOR	handle_interrupt
@@ -211,7 +212,11 @@
 ; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
 ; entry was via Exception in DS which got preempted in kernel).
 ;
-; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround
+;
+; Solution is return from Intr w/o any delay slot quirks into a kernel trampoline
+; and from pure kernel mode return to delay slot which handles DS bit/BTA correctly
+
 .Lintr_ret_to_delay_slot:
 debug_marker_ds:
 
@@ -222,18 +227,23 @@
 	ld	r2, [sp, PT_ret]
 	ld	r3, [sp, PT_status32]
 
+	; STAT32 for Int return created from scratch
+	; (No delay dlot, disable Further intr in trampoline)
+
 	bic  	r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
 	st	r0, [sp, PT_status32]
 
 	mov	r1, .Lintr_ret_to_delay_slot_2
 	st	r1, [sp, PT_ret]
 
+	; Orig exception PC/STAT32 safekept @orig_r0 and @event stack slots
 	st	r2, [sp, 0]
 	st	r3, [sp, 4]
 
 	b	.Lisr_ret_fast_path
 
 .Lintr_ret_to_delay_slot_2:
+	; Trampoline to restore orig exception PC/STAT32/BTA/AUX_USER_SP
 	sub	sp, sp, SZ_PT_REGS
 	st	r9, [sp, -4]
 
@@ -243,11 +253,19 @@
 	ld	r9, [sp, 4]
 	sr	r9, [erstatus]
 
+	; restore AUX_USER_SP if returning to U mode
+	bbit0	r9, STATUS_U_BIT, 1f
+	ld	r9, [sp, PT_sp]
+	sr	r9, [AUX_USER_SP]
+
+1:
 	ld	r9, [sp, 8]
 	sr	r9, [erbta]
 
 	ld	r9, [sp, -4]
 	add	sp, sp, SZ_PT_REGS
+
+	; return from pure kernel mode to delay slot
 	rtie
 
 END(ret_from_exception)
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 0394f9f..9425263 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -14,6 +14,8 @@
 #include <linux/irqchip.h>
 #include <asm/irq.h>
 
+static int irq_prio;
+
 /*
  * Early Hardware specific Interrupt setup
  * -Called very early (start_kernel -> setup_arch -> setup_processor)
@@ -24,6 +26,14 @@
 {
 	unsigned int tmp;
 
+	struct irq_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:3, firq:1, prio:4, exts:8, irqs:8, ver:8;
+#else
+		unsigned int ver:8, irqs:8, exts:8, prio:4, firq:1, pad:3;
+#endif
+	} irq_bcr;
+
 	struct aux_irq_ctrl {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 		unsigned int res3:18, save_idx_regs:1, res2:1,
@@ -46,28 +56,25 @@
 
 	WRITE_AUX(AUX_IRQ_CTRL, ictrl);
 
-	/* setup status32, don't enable intr yet as kernel doesn't want */
-	tmp = read_aux_reg(0xa);
-	tmp |= ISA_INIT_STATUS_BITS;
-	tmp &= ~STATUS_IE_MASK;
-	asm volatile("flag %0	\n"::"r"(tmp));
-
 	/*
 	 * ARCv2 core intc provides multiple interrupt priorities (upto 16).
 	 * Typical builds though have only two levels (0-high, 1-low)
 	 * Linux by default uses lower prio 1 for most irqs, reserving 0 for
 	 * NMI style interrupts in future (say perf)
-	 *
-	 * Read the intc BCR to confirm that Linux default priority is avail
-	 * in h/w
-	 *
-	 * Note:
-	 *  IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level
-	 *  is 0 based.
 	 */
-	tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF;
-	if (ARCV2_IRQ_DEF_PRIO > tmp)
-		panic("Linux default irq prio incorrect\n");
+
+	READ_BCR(ARC_REG_IRQ_BCR, irq_bcr);
+
+	irq_prio = irq_bcr.prio;	/* Encoded as N-1 for N levels */
+	pr_info("archs-intc\t: %d priority levels (default %d)%s\n",
+		irq_prio + 1, irq_prio,
+		irq_bcr.firq ? " FIRQ (not used)":"");
+
+	/* setup status32, don't enable intr yet as kernel doesn't want */
+	tmp = read_aux_reg(0xa);
+	tmp |= STATUS_AD_MASK | (irq_prio << 1);
+	tmp &= ~STATUS_IE_MASK;
+	asm volatile("flag %0	\n"::"r"(tmp));
 }
 
 static void arcv2_irq_mask(struct irq_data *data)
@@ -86,7 +93,7 @@
 {
 	/* set default priority */
 	write_aux_reg(AUX_IRQ_SELECT, data->irq);
-	write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+	write_aux_reg(AUX_IRQ_PRIORITY, irq_prio);
 
 	/*
 	 * hw auto enables (linux unmask) all by default
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 06bcedf..224d1c3 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -81,9 +81,6 @@
 {
 	switch (irq) {
 	case TIMER0_IRQ:
-#ifdef CONFIG_SMP
-	case IPI_IRQ:
-#endif
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
 		break;
 	default:
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index bd237ac..c41c364 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -11,9 +11,13 @@
 #include <linux/smp.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
+#include <asm/irqflags-arcv2.h>
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
+#define IPI_IRQ		19
+#define SOFTIRQ_IRQ	21
+
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
 
@@ -22,6 +26,7 @@
 static void mcip_setup_per_cpu(int cpu)
 {
 	smp_ipi_irq_setup(cpu, IPI_IRQ);
+	smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
 }
 
 static void mcip_ipi_send(int cpu)
@@ -29,46 +34,44 @@
 	unsigned long flags;
 	int ipi_was_pending;
 
+	/* ARConnect can only send IPI to others */
+	if (unlikely(cpu == raw_smp_processor_id())) {
+		arc_softirq_trigger(SOFTIRQ_IRQ);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
 	/*
-	 * NOTE: We must spin here if the other cpu hasn't yet
-	 * serviced a previous message. This can burn lots
-	 * of time, but we MUST follows this protocol or
-	 * ipi messages can be lost!!!
-	 * Also, we must release the lock in this loop because
-	 * the other side may get to this same loop and not
-	 * be able to ack -- thus causing deadlock.
+	 * If receiver already has a pending interrupt, elide sending this one.
+	 * Linux cross core calling works well with concurrent IPIs
+	 * coalesced into one
+	 * see arch/arc/kernel/smp.c: ipi_send_msg_one()
 	 */
+	__mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+	ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+	if (!ipi_was_pending)
+		__mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
 
-	do {
-		raw_spin_lock_irqsave(&mcip_lock, flags);
-		__mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
-		ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
-		if (ipi_was_pending == 0)
-			break; /* break out but keep lock */
-		raw_spin_unlock_irqrestore(&mcip_lock, flags);
-	} while (1);
-
-	__mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
-	if (ipi_was_pending)
-		pr_info("IPI ACK delayed from cpu %d\n", cpu);
-#endif
 }
 
 static void mcip_ipi_clear(int irq)
 {
 	unsigned int cpu, c;
 	unsigned long flags;
-	unsigned int __maybe_unused copy;
+
+	if (unlikely(irq == SOFTIRQ_IRQ)) {
+		arc_softirq_clear(irq);
+		return;
+	}
 
 	raw_spin_lock_irqsave(&mcip_lock, flags);
 
 	/* Who sent the IPI */
 	__mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
 
-	copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK);	/* 1,2,4,8... */
+	cpu = read_aux_reg(ARC_REG_MCIP_READBACK);	/* 1,2,4,8... */
 
 	/*
 	 * In rare case, multiple concurrent IPIs sent to same target can
@@ -82,12 +85,6 @@
 	} while (cpu);
 
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
-	if (c != __ffs(copy))
-		pr_info("IPIs from %x coalesced to %x\n",
-			copy, raw_smp_processor_id());
-#endif
 }
 
 static void mcip_probe_n_setup(void)
@@ -96,13 +93,13 @@
 #ifdef CONFIG_CPU_BIG_ENDIAN
 		unsigned int pad3:8,
 			     idu:1, llm:1, num_cores:6,
-			     iocoh:1,  grtc:1, dbg:1, pad2:1,
+			     iocoh:1,  gfrc:1, dbg:1, pad2:1,
 			     msg:1, sem:1, ipi:1, pad:1,
 			     ver:8;
 #else
 		unsigned int ver:8,
 			     pad:1, ipi:1, sem:1, msg:1,
-			     pad2:1, dbg:1, grtc:1, iocoh:1,
+			     pad2:1, dbg:1, gfrc:1, iocoh:1,
 			     num_cores:6, llm:1, idu:1,
 			     pad3:8;
 #endif
@@ -111,12 +108,13 @@
 	READ_BCR(ARC_REG_MCIP_BCR, mp);
 
 	sprintf(smp_cpuinfo_buf,
-		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n",
 		mp.ver, mp.num_cores,
 		IS_AVAIL1(mp.ipi, "IPI "),
 		IS_AVAIL1(mp.idu, "IDU "),
+		IS_AVAIL1(mp.llm, "LLM "),
 		IS_AVAIL1(mp.dbg, "DEBUG "),
-		IS_AVAIL1(mp.grtc, "GRTC"));
+		IS_AVAIL1(mp.gfrc, "GFRC"));
 
 	idu_detected = mp.idu;
 
@@ -125,8 +123,8 @@
 		__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
 	}
 
-	if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc)
-		panic("kernel trying to use non-existent GRTC\n");
+	if (IS_ENABLED(CONFIG_ARC_HAS_GFRC) && !mp.gfrc)
+		panic("kernel trying to use non-existent GFRC\n");
 }
 
 struct plat_smp_ops plat_smp_ops = {
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index e1b8744..cdc821d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -42,9 +42,57 @@
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
+static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+{
+	if (is_isa_arcompact()) {
+		struct bcr_iccm_arcompact iccm;
+		struct bcr_dccm_arcompact dccm;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
+			cpu->iccm.base_addr = iccm.base << 16;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			unsigned long base;
+			cpu->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
+
+			base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+			cpu->dccm.base_addr = base & ~0xF;
+		}
+	} else {
+		struct bcr_iccm_arcv2 iccm;
+		struct bcr_dccm_arcv2 dccm;
+		unsigned long region;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
+			if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+				cpu->iccm.sz <<= iccm.sz01;
+
+			region = read_aux_reg(ARC_REG_AUX_ICCM);
+			cpu->iccm.base_addr = region & 0xF0000000;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			cpu->dccm.sz = 256 << dccm.sz0;
+			if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+				cpu->dccm.sz <<= dccm.sz1;
+
+			region = read_aux_reg(ARC_REG_AUX_DCCM);
+			cpu->dccm.base_addr = region & 0xF0000000;
+		}
+	}
+}
+
 static void read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
+	struct bcr_timer timer;
 	struct bcr_generic bcr;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 	unsigned long perip_space;
@@ -53,7 +101,11 @@
 	READ_BCR(AUX_IDENTITY, cpu->core);
 	READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa);
 
-	READ_BCR(ARC_REG_TIMERS_BCR, cpu->timers);
+	READ_BCR(ARC_REG_TIMERS_BCR, timer);
+	cpu->extn.timer0 = timer.t0;
+	cpu->extn.timer1 = timer.t1;
+	cpu->extn.rtc = timer.rtc;
+
 	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
 	READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
@@ -71,36 +123,11 @@
 	cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0;        /* 1,3 */
 	cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
 	cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
-
-	/* Note that we read the CCM BCRs independent of kernel config
-	 * This is to catch the cases where user doesn't know that
-	 * CCMs are present in hardware build
-	 */
-	{
-		struct bcr_iccm iccm;
-		struct bcr_dccm dccm;
-		struct bcr_dccm_base dccm_base;
-		unsigned int bcr_32bit_val;
-
-		bcr_32bit_val = read_aux_reg(ARC_REG_ICCM_BCR);
-		if (bcr_32bit_val) {
-			iccm = *((struct bcr_iccm *)&bcr_32bit_val);
-			cpu->iccm.base_addr = iccm.base << 16;
-			cpu->iccm.sz = 0x2000 << (iccm.sz - 1);
-		}
-
-		bcr_32bit_val = read_aux_reg(ARC_REG_DCCM_BCR);
-		if (bcr_32bit_val) {
-			dccm = *((struct bcr_dccm *)&bcr_32bit_val);
-			cpu->dccm.sz = 0x800 << (dccm.sz);
-
-			READ_BCR(ARC_REG_DCCMBASE_BCR, dccm_base);
-			cpu->dccm.base_addr = dccm_base.addr << 8;
-		}
-	}
-
 	READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
 
+	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
+	read_decode_ccm_bcr(cpu);
+
 	read_decode_mmu_bcr();
 	read_decode_cache_bcr();
 
@@ -208,9 +235,9 @@
 		       (unsigned int)(arc_get_core_freq() / 10000) % 100);
 
 	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
-		       IS_AVAIL1(cpu->timers.t0, "Timer0 "),
-		       IS_AVAIL1(cpu->timers.t1, "Timer1 "),
-		       IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ",
+		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
+		       IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
+		       IS_AVAIL2(cpu->extn.rtc, "Local-64-bit-Ctr ",
 				 CONFIG_ARC_HAS_RTC));
 
 	n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
@@ -232,8 +259,6 @@
 
 			n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
 		}
-		n += scnprintf(buf + n, len - n, "%s",
-			       IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY));
 	}
 
 	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
@@ -293,13 +318,13 @@
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 	int fpu_enabled;
 
-	if (!cpu->timers.t0)
+	if (!cpu->extn.timer0)
 		panic("Timer0 is not present!\n");
 
-	if (!cpu->timers.t1)
+	if (!cpu->extn.timer1)
 		panic("Timer1 is not present!\n");
 
-	if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc)
+	if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->extn.rtc)
 		panic("RTC is not present\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
@@ -334,6 +359,7 @@
 		panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
 
 	if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+	    IS_ENABLED(CONFIG_ARC_HAS_LLSC) &&
 	    !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
 		panic("llock/scond livelock workaround missing\n");
 }
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index ef6e9e1..424e937 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -336,11 +336,8 @@
 		int rc;
 
 		rc = __do_IPI(msg);
-#ifdef CONFIG_ARC_IPI_DBG
-		/* IPI received but no valid @msg */
 		if (rc)
 			pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
-#endif
 		pending &= ~(1U << msg);
 	} while (pending);
 
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index dfad287..156d983 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -62,7 +62,7 @@
 
 /********** Clock Source Device *********/
 
-#ifdef CONFIG_ARC_HAS_GRTC
+#ifdef CONFIG_ARC_HAS_GFRC
 
 static int arc_counter_setup(void)
 {
@@ -83,10 +83,10 @@
 
 	local_irq_save(flags);
 
-	__mcip_cmd(CMD_GRTC_READ_LO, 0);
+	__mcip_cmd(CMD_GFRC_READ_LO, 0);
 	stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK);
 
-	__mcip_cmd(CMD_GRTC_READ_HI, 0);
+	__mcip_cmd(CMD_GFRC_READ_HI, 0);
 	stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK);
 
 	local_irq_restore(flags);
@@ -95,7 +95,7 @@
 }
 
 static struct clocksource arc_counter = {
-	.name   = "ARConnect GRTC",
+	.name   = "ARConnect GFRC",
 	.rating = 400,
 	.read   = arc_counter_read,
 	.mask   = CLOCKSOURCE_MASK(64),
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 4c23a68..43788b1 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -106,6 +106,15 @@
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
@@ -186,5 +195,7 @@
 $(obj)/font.c: $(FONTC)
 	$(call cmd,shipped)
 
+AFLAGS_hyp-stub.o := -Wa,-march=armv7-a
+
 $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
 	$(call cmd,shipped)
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index f3db13d..0cc150b 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -285,8 +285,10 @@
 	};
 };
 
+
+/include/ "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
 	/*
 	 * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
 	 * mode") at poweroff.  Most BeagleBone versions do not support RTC-only
@@ -307,17 +309,12 @@
 	ti,pmic-shutdown-controller;
 
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			regulator-name = "vdds_dpr";
 			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -327,7 +324,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -337,25 +333,21 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			regulator-name = "vio,vrtc,vdds";
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			regulator-name = "vdd_3v3aux";
 			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			regulator-name = "vdd_1v8";
 			regulator-always-on;
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			regulator-name = "vdd_3v3a";
 			regulator-always-on;
 		};
diff --git a/arch/arm/boot/dts/am335x-chilisom.dtsi b/arch/arm/boot/dts/am335x-chilisom.dtsi
index fda457b..857d989 100644
--- a/arch/arm/boot/dts/am335x-chilisom.dtsi
+++ b/arch/arm/boot/dts/am335x-chilisom.dtsi
@@ -128,21 +128,16 @@
 
 };
 
+/include/ "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
-
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			regulator-name = "vdds_dpr";
 			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -152,7 +147,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -162,28 +156,24 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			regulator-name = "vio,vrtc,vdds";
 			regulator-boot-on;
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			regulator-name = "vdd_3v3aux";
 			regulator-boot-on;
 			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			regulator-name = "vdd_1v8";
 			regulator-boot-on;
 			regulator-always-on;
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			regulator-name = "vdd_3v3d";
 			regulator-boot-on;
 			regulator-always-on;
diff --git a/arch/arm/boot/dts/am335x-nano.dts b/arch/arm/boot/dts/am335x-nano.dts
index 77559a1..f313999 100644
--- a/arch/arm/boot/dts/am335x-nano.dts
+++ b/arch/arm/boot/dts/am335x-nano.dts
@@ -375,15 +375,11 @@
 	wp-gpios = <&gpio3 18 0>;
 };
 
+#include "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
-
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			/* +1.5V voltage with ±4% tolerance */
 			regulator-min-microvolt = <1450000>;
 			regulator-max-microvolt = <1550000>;
@@ -392,7 +388,6 @@
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.1V with ±4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <915000>;
@@ -402,7 +397,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with ±4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <915000>;
@@ -412,7 +406,6 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			/* +1.8V voltage with ±4% tolerance */
 			regulator-min-microvolt = <1750000>;
 			regulator-max-microvolt = <1870000>;
@@ -421,7 +414,6 @@
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			/* +3.3V voltage with ±4% tolerance */
 			regulator-min-microvolt = <3175000>;
 			regulator-max-microvolt = <3430000>;
@@ -430,7 +422,6 @@
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			/* +1.8V voltage with ±4% tolerance */
 			regulator-min-microvolt = <1750000>;
 			regulator-max-microvolt = <1870000>;
@@ -439,7 +430,6 @@
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			/* +3.3V voltage with ±4% tolerance */
 			regulator-min-microvolt = <3175000>;
 			regulator-max-microvolt = <3430000>;
diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts
index 471a3a7..8867aaa 100644
--- a/arch/arm/boot/dts/am335x-pepper.dts
+++ b/arch/arm/boot/dts/am335x-pepper.dts
@@ -420,9 +420,9 @@
 	vin-supply = <&vbat>;
 };
 
-&tps {
-	compatible = "ti,tps65217";
+/include/ "tps65217.dtsi"
 
+&tps {
 	backlight {
 		isel = <1>; /* ISET1 */
 		fdim = <200>; /* TPS65217_BL_FDIM_200HZ */
@@ -430,17 +430,12 @@
 	};
 
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			/* VDD_1V8 system supply */
 			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -450,7 +445,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -460,21 +454,18 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			/* VRTC 1.8V always-on supply */
 			regulator-name = "vrtc,vdds";
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			/* 3.3V rail */
 			regulator-name = "vdd_3v3aux";
 			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			/* VDD_3V3A 3.3V rail */
 			regulator-name = "vdd_3v3a";
 			regulator-min-microvolt = <3300000>;
@@ -482,7 +473,6 @@
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			/* VDD_3V3B 3.3V rail */
 			regulator-name = "vdd_3v3b";
 			regulator-always-on;
diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts
index 1b5b044..865de85 100644
--- a/arch/arm/boot/dts/am335x-shc.dts
+++ b/arch/arm/boot/dts/am335x-shc.dts
@@ -46,7 +46,7 @@
 			gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>;
 			linux,code = <KEY_BACK>;
 			debounce-interval = <1000>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		front_button {
@@ -54,7 +54,7 @@
 			gpios = <&gpio1 25 GPIO_ACTIVE_HIGH>;
 			linux,code = <KEY_FRONT>;
 			debounce-interval = <1000>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts
index d38edfa..3303c28 100644
--- a/arch/arm/boot/dts/am335x-sl50.dts
+++ b/arch/arm/boot/dts/am335x-sl50.dts
@@ -375,19 +375,16 @@
 	pinctrl-0 = <&uart4_pins>;
 };
 
+#include "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
 	ti,pmic-shutdown-controller;
 
 	interrupt-parent = <&intc>;
 	interrupts = <7>;	/* NNMI */
 
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			/* VDDS_DDR */
 			regulator-min-microvolt = <1500000>;
 			regulator-max-microvolt = <1500000>;
@@ -395,7 +392,6 @@
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -405,7 +401,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -415,7 +410,6 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			/* VRTC / VIO / VDDS*/
 			regulator-always-on;
 			regulator-min-microvolt = <1800000>;
@@ -423,7 +417,6 @@
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			/* VDD_3V3AUX */
 			regulator-always-on;
 			regulator-min-microvolt = <3300000>;
@@ -431,7 +424,6 @@
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			/* VDD_1V8 */
 			regulator-min-microvolt = <1800000>;
 			regulator-max-microvolt = <1800000>;
@@ -439,7 +431,6 @@
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			/* VDD_3V3A */
 			regulator-min-microvolt = <3300000>;
 			regulator-max-microvolt = <3300000>;
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 04885f9..1fafaad 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -439,6 +439,7 @@
 			ti,mbox-num-users = <4>;
 			ti,mbox-num-fifos = <8>;
 			mbox_wkupm3: wkup_m3 {
+				ti,mbox-send-noirq;
 				ti,mbox-tx = <0 0 0>;
 				ti,mbox-rx = <0 0 3>;
 			};
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index df955ba..92068fb 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -73,7 +73,7 @@
 	global_timer: timer@48240200 {
 		compatible = "arm,cortex-a9-global-timer";
 		reg = <0x48240200 0x100>;
-		interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
 		interrupt-parent = <&gic>;
 		clocks = <&mpu_periphclk>;
 	};
@@ -81,7 +81,7 @@
 	local_timer: timer@48240600 {
 		compatible = "arm,cortex-a9-twd-timer";
 		reg = <0x48240600 0x100>;
-		interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
 		interrupt-parent = <&gic>;
 		clocks = <&mpu_periphclk>;
 	};
@@ -290,6 +290,7 @@
 			ti,mbox-num-users = <4>;
 			ti,mbox-num-fifos = <8>;
 			mbox_wkupm3: wkup_m3 {
+				ti,mbox-send-noirq;
 				ti,mbox-tx = <0 0 0>;
 				ti,mbox-rx = <0 0 3>;
 			};
diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index 64d4332..ecd09ab 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@@ -590,8 +590,6 @@
 		pinctrl-names = "default";
 		pinctrl-0 = <&pixcir_ts_pins>;
 		reg = <0x5c>;
-		interrupt-parent = <&gpio3>;
-		interrupts = <22 0>;
 
 		attb-gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
 
@@ -599,7 +597,7 @@
 		 * 0x264 represents the offset of padconf register of
 		 * gpio3_22 from am43xx_pinmux base.
 		 */
-		interrupts-extended = <&gpio3 22 IRQ_TYPE_NONE>,
+		interrupts-extended = <&gpio3 22 IRQ_TYPE_EDGE_FALLING>,
 				      <&am43xx_pinmux 0x264>;
 		interrupt-names = "tsc", "wakeup";
 
diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 746fd2b..d580e2b7 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -491,7 +491,7 @@
 		pinctrl-0 = <&pixcir_ts_pins>;
 		reg = <0x5c>;
 		interrupt-parent = <&gpio1>;
-		interrupts = <17 0>;
+		interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
 
 		attb-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>;
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 36c0fa6..a0986c6 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -173,6 +173,8 @@
 
 		sound0_master: simple-audio-card,codec {
 			sound-dai = <&tlv320aic3104>;
+			assigned-clocks = <&clkoutmux2_clk_mux>;
+			assigned-clock-parents = <&sys_clk2_dclk_div>;
 			clocks = <&clkout2_clk>;
 		};
 	};
@@ -796,6 +798,8 @@
 	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&mcasp3_pins_default>;
 	pinctrl-1 = <&mcasp3_pins_sleep>;
+	assigned-clocks = <&mcasp3_ahclkx_mux>;
+	assigned-clock-parents = <&sys_clkin2>;
 	status = "okay";
 
 	op-mode = <0>;	/* MCASP_IIS_MODE */
diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
index c538826..1c06cb7 100644
--- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
+++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
@@ -167,7 +167,7 @@
 			DRA7XX_CORE_IOPAD(0x35b8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d20.rgmii1_rd3 */
 			DRA7XX_CORE_IOPAD(0x35bc, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d21.rgmii1_rd2 */
 			DRA7XX_CORE_IOPAD(0x35c0, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d22.rgmii1_rd1 */
-			DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLUP | MUX_MODE3)	/* vin2a_d23.rgmii1_rd0 */
+			DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
 		>;
 	};
 
@@ -492,14 +492,14 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&qspi1_pins>;
 
-	spi-max-frequency = <20000000>;
+	spi-max-frequency = <48000000>;
 
 	spi_flash: spi_flash@0 {
 		#address-cells = <1>;
 		#size-cells = <1>;
 		compatible = "spansion,m25p80", "jedec,spi-nor";
 		reg = <0>;				/* CS0 */
-		spi-max-frequency = <20000000>;
+		spi-max-frequency = <48000000>;
 
 		partition@0 {
 			label = "uboot";
@@ -545,7 +545,7 @@
 		ti,debounce-tol = /bits/ 16 <10>;
 		ti,debounce-rep = /bits/ 16 <1>;
 
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 
@@ -559,13 +559,13 @@
 
 &cpsw_emac0 {
 	phy_id = <&davinci_mdio>, <0>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-txid";
 	dual_emac_res_vlan = <0>;
 };
 
 &cpsw_emac1 {
 	phy_id = <&davinci_mdio>, <1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-txid";
 	dual_emac_res_vlan = <1>;
 };
 
@@ -588,7 +588,7 @@
 };
 
 &usb2 {
-	dr_mode = "peripheral";
+	dr_mode = "host";
 };
 
 &mcasp3 {
diff --git a/arch/arm/boot/dts/am57xx-sbc-am57x.dts b/arch/arm/boot/dts/am57xx-sbc-am57x.dts
index 77bb8e1..988e996 100644
--- a/arch/arm/boot/dts/am57xx-sbc-am57x.dts
+++ b/arch/arm/boot/dts/am57xx-sbc-am57x.dts
@@ -25,8 +25,8 @@
 &dra7_pmx_core {
 	uart3_pins_default: uart3_pins_default {
 		pinctrl-single,pins = <
-			DRA7XX_CORE_IOPAD(0x37f8, PIN_INPUT_SLEW | MUX_MODE2)	/* uart2_ctsn.uart3_rxd */
-			DRA7XX_CORE_IOPAD(0x37fc, PIN_INPUT_SLEW | MUX_MODE1)	/* uart2_rtsn.uart3_txd */
+			DRA7XX_CORE_IOPAD(0x3648, PIN_INPUT_SLEW | MUX_MODE0)	/* uart3_rxd */
+			DRA7XX_CORE_IOPAD(0x364c, PIN_INPUT_SLEW | MUX_MODE0)	/* uart3_txd */
 		>;
 	};
 
@@ -108,9 +108,9 @@
 	pinctrl-0 = <&i2c5_pins_default>;
 	clock-frequency = <400000>;
 
-	eeprom_base: atmel@50 {
+	eeprom_base: atmel@54 {
 		compatible = "atmel,24c08";
-		reg = <0x50>;
+		reg = <0x54>;
 		pagesize = <16>;
 	};
 
diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
index 23fc670..5c21b23 100644
--- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts
+++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
@@ -70,8 +70,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts
index f774101..ebe1d26 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -76,8 +76,8 @@
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		devbus-bootcs {
 			status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts
index 4878d73..5730b87 100644
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts
@@ -95,8 +95,8 @@
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		devbus-bootcs {
 			status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
index 13cf69a8..8af463f 100644
--- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
+++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
@@ -65,8 +65,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
 			MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";
@@ -152,6 +152,7 @@
 				nand-on-flash-bbt;
 
 				partitions {
+					compatible = "fixed-partitions";
 					#address-cells = <1>;
 					#size-cells = <1>;
 
diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index 6e9820e..b89e6cf 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
@@ -70,8 +70,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts
index 6ab3383..6522b04 100644
--- a/arch/arm/boot/dts/armada-xp-matrix.dts
+++ b/arch/arm/boot/dts/armada-xp-matrix.dts
@@ -68,8 +68,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		internal-regs {
 			serial@12000 {
diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
index 62175a8..d19f44c 100644
--- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
+++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
@@ -64,8 +64,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index a5db177..853bd39 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -65,9 +65,9 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		devbus-bootcs {
 			status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
index 2391b11..d17dab0 100644
--- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts
+++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
@@ -78,8 +78,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";
diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index 77ddff0..e683856 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -114,9 +114,15 @@
 
 			macb0: ethernet@f8008000 {
 				pinctrl-names = "default";
-				pinctrl-0 = <&pinctrl_macb0_default>;
+				pinctrl-0 = <&pinctrl_macb0_default &pinctrl_macb0_phy_irq>;
 				phy-mode = "rmii";
 				status = "okay";
+
+				ethernet-phy@1 {
+					reg = <0x1>;
+					interrupt-parent = <&pioA>;
+					interrupts = <73 IRQ_TYPE_LEVEL_LOW>;
+				};
 			};
 
 			pdmic@f8018000 {
@@ -300,6 +306,10 @@
 					bias-disable;
 				};
 
+				pinctrl_macb0_phy_irq: macb0_phy_irq {
+					pinmux = <PIN_PC9__GPIO>;
+				};
+
 				pinctrl_pdmic_default: pdmic_default {
 					pinmux = <PIN_PB26__PDMIC_DAT>,
 						<PIN_PB27__PDMIC_CLK>;
diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index 131614f..569026e 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
@@ -86,10 +86,12 @@
 			macb0: ethernet@f8020000 {
 				phy-mode = "rmii";
 				status = "okay";
+				pinctrl-names = "default";
+				pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
 
 				phy0: ethernet-phy@1 {
 					interrupt-parent = <&pioE>;
-					interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+					interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
 					reg = <1>;
 				};
 			};
@@ -152,6 +154,10 @@
 						atmel,pins =
 							<AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
 					};
+					pinctrl_macb0_phy_irq: macb0_phy_irq_0 {
+						atmel,pins =
+							<AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+					};
 				};
 			};
 		};
diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts
index 2d4a3310..4e98cda 100644
--- a/arch/arm/boot/dts/at91-sama5d4ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d4ek.dts
@@ -160,8 +160,15 @@
 			};
 
 			macb0: ethernet@f8020000 {
+				pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
 				phy-mode = "rmii";
 				status = "okay";
+
+				ethernet-phy@1 {
+					reg = <0x1>;
+					interrupt-parent = <&pioE>;
+					interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+				};
 			};
 
 			mmc1: mmc@fc000000 {
@@ -193,6 +200,10 @@
 
 			pinctrl@fc06a000 {
 				board {
+					pinctrl_macb0_phy_irq: macb0_phy_irq {
+						atmel,pins =
+							<AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+					};
 					pinctrl_mmc0_cd: mmc0_cd {
 						atmel,pins =
 							<AT91_PIOE 5 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index ca4ddf8..626c67d 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -215,7 +215,7 @@
 	};
 
 	panel: panel {
-		compatible = "qd,qd43003c0-40", "simple-panel";
+		compatible = "qiaodian,qd43003c0-40", "simple-panel";
 		backlight = <&backlight>;
 		power-supply = <&panel_reg>;
 		#address-cells = <1>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index c4d9175..f82aa44 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1500,6 +1500,16 @@
 			       0x48485200 0x2E00>;
 			#address-cells = <1>;
 			#size-cells = <1>;
+
+			/*
+			 * Do not allow gating of cpsw clock as workaround
+			 * for errata i877. Keeping internal clock disabled
+			 * causes the device switching characteristics
+			 * to degrade over time and eventually fail to meet
+			 * the data manual delay time/skew specs.
+			 */
+			ti,no-idle;
+
 			/*
 			 * rx_thresh_pend
 			 * rx_pend
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index 4f6ae92..f74d3db 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -896,7 +896,6 @@
 				#size-cells = <1>;
 				reg = <0x2100000 0x10000>;
 				ranges = <0 0x2100000 0x10000>;
-				interrupt-parent = <&intc>;
 				clocks = <&clks IMX6QDL_CLK_CAAM_MEM>,
 					 <&clks IMX6QDL_CLK_CAAM_ACLK>,
 					 <&clks IMX6QDL_CLK_CAAM_IPG>,
diff --git a/arch/arm/boot/dts/kirkwood-ds112.dts b/arch/arm/boot/dts/kirkwood-ds112.dts
index bf4143c..b84af3d 100644
--- a/arch/arm/boot/dts/kirkwood-ds112.dts
+++ b/arch/arm/boot/dts/kirkwood-ds112.dts
@@ -14,7 +14,7 @@
 #include "kirkwood-synology.dtsi"
 
 / {
-	model = "Synology DS111";
+	model = "Synology DS112";
 	compatible = "synology,ds111", "marvell,kirkwood";
 
 	memory {
diff --git a/arch/arm/boot/dts/kirkwood-lswvl.dts b/arch/arm/boot/dts/kirkwood-lswvl.dts
index 09eed3c..36eec73 100644
--- a/arch/arm/boot/dts/kirkwood-lswvl.dts
+++ b/arch/arm/boot/dts/kirkwood-lswvl.dts
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WVL/VL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -156,21 +157,21 @@
 		button@1 {
 			label = "Function Button";
 			linux,code = <KEY_OPTION>;
-			gpios = <&gpio0 45 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
 		};
 
 		button@2 {
 			label = "Power-on Switch";
 			linux,code = <KEY_RESERVED>;
 			linux,input-type = <5>;
-			gpios = <&gpio0 46 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
 		};
 
 		button@3 {
 			label = "Power-auto Switch";
 			linux,code = <KEY_ESC>;
 			linux,input-type = <5>;
-			gpios = <&gpio0 47 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
 		};
 	};
 
@@ -185,38 +186,38 @@
 
 		led@1 {
 			label = "lswvl:red:alarm";
-			gpios = <&gpio0 36 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@2 {
 			label = "lswvl:red:func";
-			gpios = <&gpio0 37 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@3 {
 			label = "lswvl:amber:info";
-			gpios = <&gpio0 38 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@4 {
 			label = "lswvl:blue:func";
-			gpios = <&gpio0 39 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@5 {
 			label = "lswvl:blue:power";
-			gpios = <&gpio0 40 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
 			default-state = "keep";
 		};
 
 		led@6 {
 			label = "lswvl:red:hdderr0";
-			gpios = <&gpio0 34 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@7 {
 			label = "lswvl:red:hdderr1";
-			gpios = <&gpio0 35 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
 		};
 	};
 
@@ -233,7 +234,7 @@
 				3250 1
 				5000 0>;
 
-		alarm-gpios = <&gpio0 43 GPIO_ACTIVE_HIGH>;
+		alarm-gpios = <&gpio1 11 GPIO_ACTIVE_HIGH>;
 	};
 
 	restart_poweroff {
diff --git a/arch/arm/boot/dts/kirkwood-lswxl.dts b/arch/arm/boot/dts/kirkwood-lswxl.dts
index f5db16a..b13ec20 100644
--- a/arch/arm/boot/dts/kirkwood-lswxl.dts
+++ b/arch/arm/boot/dts/kirkwood-lswxl.dts
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WXL/WSXL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -156,21 +157,21 @@
 		button@1 {
 			label = "Function Button";
 			linux,code = <KEY_OPTION>;
-			gpios = <&gpio1 41 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
 		};
 
 		button@2 {
 			label = "Power-on Switch";
 			linux,code = <KEY_RESERVED>;
 			linux,input-type = <5>;
-			gpios = <&gpio1 42 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
 		};
 
 		button@3 {
 			label = "Power-auto Switch";
 			linux,code = <KEY_ESC>;
 			linux,input-type = <5>;
-			gpios = <&gpio1 43 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 11 GPIO_ACTIVE_LOW>;
 		};
 	};
 
@@ -185,12 +186,12 @@
 
 		led@1 {
 			label = "lswxl:blue:func";
-			gpios = <&gpio1 36 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
 		};
 
 		led@2 {
 			label = "lswxl:red:alarm";
-			gpios = <&gpio1 49 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
 		};
 
 		led@3 {
@@ -200,23 +201,23 @@
 
 		led@4 {
 			label = "lswxl:blue:power";
-			gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
+			default-state = "keep";
 		};
 
 		led@5 {
 			label = "lswxl:red:func";
-			gpios = <&gpio1 5 GPIO_ACTIVE_LOW>;
-			default-state = "keep";
+			gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@6 {
 			label = "lswxl:red:hdderr0";
-			gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>;
 		};
 
 		led@7 {
 			label = "lswxl:red:hdderr1";
-			gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
+			gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>;
 		};
 	};
 
@@ -225,15 +226,15 @@
 		pinctrl-0 = <&pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
 		pinctrl-names = "default";
 
-		gpios = <&gpio0 47 GPIO_ACTIVE_LOW
-			 &gpio0 48 GPIO_ACTIVE_LOW>;
+		gpios = <&gpio1 16 GPIO_ACTIVE_LOW
+			 &gpio1 15 GPIO_ACTIVE_LOW>;
 
 		gpio-fan,speed-map = <0 3
 				1500 2
 				3250 1
 				5000 0>;
 
-		alarm-gpios = <&gpio1 49 GPIO_ACTIVE_HIGH>;
+		alarm-gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>;
 	};
 
 	restart_poweroff {
@@ -256,7 +257,7 @@
 			enable-active-high;
 			regulator-always-on;
 			regulator-boot-on;
-			gpio = <&gpio0 37 GPIO_ACTIVE_HIGH>;
+			gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>;
 		};
 		hdd_power0: regulator@2 {
 			compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts b/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts
index 1db6f2c..8082d64 100644
--- a/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts
+++ b/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts
@@ -131,6 +131,7 @@
 	chip-delay = <40>;
 	status = "okay";
 	partitions {
+		compatible = "fixed-partitions";
 		#address-cells = <1>;
 		#size-cells = <1>;
 
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 7fed0bd..0080532 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -112,14 +112,6 @@
 	clock-frequency = <400000>;
 };
 
-&i2c2 {
-	clock-frequency = <400000>;
-};
-
-&i2c3 {
-	clock-frequency = <400000>;
-};
-
 /*
  * Only found on the wireless SOM. For the SOM without wireless, the pins for
  * MMC3 can be routed with jumpers to the second MMC slot on the devkit and
@@ -143,6 +135,7 @@
 		interrupt-parent = <&gpio5>;
 		interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
 		ref-clock-frequency = <26000000>;
+		tcxo-clock-frequency = <26000000>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 888412c..902657d 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -130,6 +130,16 @@
 	};
 };
 
+&gpio8 {
+	/* TI trees use GPIO instead of msecure, see also muxing */
+	p234 {
+		gpio-hog;
+		gpios = <10 GPIO_ACTIVE_HIGH>;
+		output-high;
+		line-name = "gpio8_234/msecure";
+	};
+};
+
 &omap5_pmx_core {
 	pinctrl-names = "default";
 	pinctrl-0 = <
@@ -213,6 +223,13 @@
 		>;
 	};
 
+	/* TI trees use GPIO mode; msecure mode does not work reliably? */
+	palmas_msecure_pins: palmas_msecure_pins {
+		pinctrl-single,pins = <
+			OMAP5_IOPAD(0x180, PIN_OUTPUT | MUX_MODE6) /* gpio8_234 */
+		>;
+	};
+
 	usbhost_pins: pinmux_usbhost_pins {
 		pinctrl-single,pins = <
 			OMAP5_IOPAD(0x0c4, PIN_INPUT | MUX_MODE0) /* usbb2_hsic_strobe */
@@ -278,6 +295,12 @@
 			&usbhost_wkup_pins
 	>;
 
+	palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins {
+		pinctrl-single,pins = <
+			OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */
+		>;
+	};
+
 	usbhost_wkup_pins: pinmux_usbhost_wkup_pins {
 		pinctrl-single,pins = <
 			OMAP5_IOPAD(0x05a, PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */
@@ -345,6 +368,8 @@
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		ti,system-power-controller;
+		pinctrl-names = "default";
+		pinctrl-0 = <&palmas_sys_nirq_pins &palmas_msecure_pins>;
 
 		extcon_usb3: palmas_usb {
 			compatible = "ti,palmas-usb-vid";
@@ -358,6 +383,14 @@
 			#clock-cells = <0>;
 		};
 
+		rtc {
+			compatible = "ti,palmas-rtc";
+			interrupt-parent = <&palmas>;
+			interrupts = <8 IRQ_TYPE_NONE>;
+			ti,backup-battery-chargeable;
+			ti,backup-battery-charge-high-current;
+		};
+
 		palmas_pmic {
 			compatible = "ti,palmas-pmic";
 			interrupt-parent = <&palmas>;
diff --git a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
index 3daec91..aae8a7a 100644
--- a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
+++ b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WTGL
  *
- * Copyright (C) 2015, Roger Shimizu <rogershimizu@gmail.com>
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -69,8 +70,6 @@
 
 		internal-regs {
 			pinctrl: pinctrl@10000 {
-				pinctrl-0 = <&pmx_usb_power &pmx_power_hdd
-					&pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
 				pinctrl-names = "default";
 
 				pmx_led_power: pmx-leds {
@@ -162,6 +161,7 @@
 		led@1 {
 			label = "lswtgl:blue:power";
 			gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+			default-state = "keep";
 		};
 
 		led@2 {
@@ -188,7 +188,7 @@
 				3250 1
 				5000 0>;
 
-		alarm-gpios = <&gpio0 2 GPIO_ACTIVE_HIGH>;
+		alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
 	};
 
 	restart_poweroff {
@@ -228,6 +228,37 @@
 	};
 };
 
+&devbus_bootcs {
+	status = "okay";
+	devbus,keep-config;
+
+	flash@0 {
+		compatible = "jedec-flash";
+		reg = <0 0x40000>;
+		bank-width = <1>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			header@0 {
+				reg = <0 0x30000>;
+				read-only;
+			};
+
+			uboot@30000 {
+				reg = <0x30000 0xF000>;
+				read-only;
+			};
+
+			uboot_env@3F000 {
+				reg = <0x3F000 0x1000>;
+			};
+		};
+	};
+};
+
 &mdio {
 	status = "okay";
 
diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index 6713b1e..01d239c 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -283,7 +283,6 @@
 	pinctrl-names = "default";
 
 	status = "okay";
-	renesas,enable-gpio = <&gpio5 31 GPIO_ACTIVE_HIGH>;
 };
 
 &usbphy {
diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1afe246..b0c912fe 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
@@ -90,7 +90,7 @@
 #define PIN_PA14__I2SC1_MCK		PINMUX_PIN(PIN_PA14, 4, 2)
 #define PIN_PA14__FLEXCOM3_IO2		PINMUX_PIN(PIN_PA14, 5, 1)
 #define PIN_PA14__D9			PINMUX_PIN(PIN_PA14, 6, 2)
-#define PIN_PA15			14
+#define PIN_PA15			15
 #define PIN_PA15__GPIO			PINMUX_PIN(PIN_PA15, 0, 0)
 #define PIN_PA15__SPI0_MOSI		PINMUX_PIN(PIN_PA15, 1, 1)
 #define PIN_PA15__TF1			PINMUX_PIN(PIN_PA15, 2, 1)
diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index b8032bc..db1151c 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -1342,7 +1342,7 @@
 			dbgu: serial@fc069000 {
 				compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
 				reg = <0xfc069000 0x200>;
-				interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>;
+				interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>;
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_dbgu>;
 				clocks = <&dbgu_clk>;
diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index d0c7438..27a333e 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -127,22 +127,14 @@
 			};
 			mmcsd_default_mode: mmcsd_default {
 				mmcsd_default_cfg1 {
-					/* MCCLK */
-					pins = "GPIO8_B10";
-					ste,output = <0>;
-				};
-				mmcsd_default_cfg2 {
-					/* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */
-					pins = "GPIO10_C11", "GPIO15_A12",
-					"GPIO16_C13", "GPIO23_D15";
-					ste,output = <1>;
-				};
-				mmcsd_default_cfg3 {
-					/* MCCMD, MCDAT3-0, MCMSFBCLK */
-					pins = "GPIO9_A10", "GPIO11_B11",
-					"GPIO12_A11", "GPIO13_C12",
-					"GPIO14_B12", "GPIO24_C15";
-					ste,input = <1>;
+					/*
+					 * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2
+					 * MCCMD, MCDAT3-0, MCMSFBCLK
+					 */
+					pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11",
+					       "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12",
+					       "GPIO16_C13", "GPIO23_D15", "GPIO24_C15";
+					ste,output = <2>;
 				};
 			};
 		};
@@ -802,10 +794,21 @@
 			clock-names = "mclk", "apb_pclk";
 			interrupt-parent = <&vica>;
 			interrupts = <22>;
-			max-frequency = <48000000>;
+			max-frequency = <400000>;
 			bus-width = <4>;
 			cap-mmc-highspeed;
 			cap-sd-highspeed;
+			full-pwr-cycle;
+			/*
+			 * The STw4811 circuit used with the Nomadik strictly
+			 * requires that all of these signal direction pins be
+			 * routed and used for its 4-bit levelshifter.
+			 */
+			st,sig-dir-dat0;
+			st,sig-dir-dat2;
+			st,sig-dir-dat31;
+			st,sig-dir-cmd;
+			st,sig-pin-fbclk;
 			pinctrl-names = "default";
 			pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>;
 			vmmc-supply = <&vmmc_regulator>;
diff --git a/arch/arm/boot/dts/tps65217.dtsi b/arch/arm/boot/dts/tps65217.dtsi
new file mode 100644
index 0000000..a632724
--- /dev/null
+++ b/arch/arm/boot/dts/tps65217.dtsi
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Integrated Power Management Chip
+ * http://www.ti.com/lit/ds/symlink/tps65217.pdf
+ */
+
+&tps {
+	compatible = "ti,tps65217";
+
+	regulators {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		dcdc1_reg: regulator@0 {
+			reg = <0>;
+			regulator-compatible = "dcdc1";
+		};
+
+		dcdc2_reg: regulator@1 {
+			reg = <1>;
+			regulator-compatible = "dcdc2";
+		};
+
+		dcdc3_reg: regulator@2 {
+			reg = <2>;
+			regulator-compatible = "dcdc3";
+		};
+
+		ldo1_reg: regulator@3 {
+			reg = <3>;
+			regulator-compatible = "ldo1";
+		};
+
+		ldo2_reg: regulator@4 {
+			reg = <4>;
+			regulator-compatible = "ldo2";
+		};
+
+		ldo3_reg: regulator@5 {
+			reg = <5>;
+			regulator-compatible = "ldo3";
+		};
+
+		ldo4_reg: regulator@6 {
+			reg = <6>;
+			regulator-compatible = "ldo4";
+		};
+	};
+};
diff --git a/arch/arm/common/icst.c b/arch/arm/common/icst.c
index 2dc6da70..d7ed252 100644
--- a/arch/arm/common/icst.c
+++ b/arch/arm/common/icst.c
@@ -16,7 +16,7 @@
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
-
+#include <asm/div64.h>
 #include <asm/hardware/icst.h>
 
 /*
@@ -29,7 +29,11 @@
 
 unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco)
 {
-	return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]);
+	u64 dividend = p->ref * 2 * (u64)(vco.v + 8);
+	u32 divisor = (vco.r + 2) * p->s2div[vco.s];
+
+	do_div(dividend, divisor);
+	return (unsigned long)dividend;
 }
 
 EXPORT_SYMBOL(icst_hz);
@@ -58,6 +62,7 @@
 
 		if (f > p->vco_min && f <= p->vco_max)
 			break;
+		i++;
 	} while (i < 8);
 
 	if (i >= 8)
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 314f6be..8e8b2ac 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -426,6 +426,7 @@
 CONFIG_IMX2_WDT=y
 CONFIG_TEGRA_WATCHDOG=m
 CONFIG_MESON_WATCHDOG=y
+CONFIG_DW_WATCHDOG=y
 CONFIG_DIGICOLOR_WATCHDOG=y
 CONFIG_MFD_AS3711=y
 CONFIG_MFD_AS3722=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index c5e1943..d18d6b4 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -50,6 +50,7 @@
 CONFIG_SOC_AM43XX=y
 CONFIG_SOC_DRA7XX=y
 CONFIG_ARM_THUMBEE=y
+CONFIG_ARM_KERNMEM_PERMS=y
 CONFIG_ARM_ERRATA_411920=y
 CONFIG_ARM_ERRATA_430973=y
 CONFIG_SMP=y
@@ -177,6 +178,7 @@
 CONFIG_AT803X_PHY=y
 CONFIG_SMSC_PHY=y
 CONFIG_USB_USBNET=m
+CONFIG_USB_NET_SMSC75XX=m
 CONFIG_USB_NET_SMSC95XX=m
 CONFIG_USB_ALI_M5632=y
 CONFIG_USB_AN2720=y
@@ -290,24 +292,23 @@
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
-CONFIG_OMAP2_DSS=m
-CONFIG_OMAP5_DSS_HDMI=y
-CONFIG_OMAP2_DSS_SDI=y
-CONFIG_OMAP2_DSS_DSI=y
+CONFIG_FB_OMAP5_DSS_HDMI=y
+CONFIG_FB_OMAP2_DSS_SDI=y
+CONFIG_FB_OMAP2_DSS_DSI=y
 CONFIG_FB_OMAP2=m
-CONFIG_DISPLAY_ENCODER_TFP410=m
-CONFIG_DISPLAY_ENCODER_TPD12S015=m
-CONFIG_DISPLAY_CONNECTOR_DVI=m
-CONFIG_DISPLAY_CONNECTOR_HDMI=m
-CONFIG_DISPLAY_CONNECTOR_ANALOG_TV=m
-CONFIG_DISPLAY_PANEL_DPI=m
-CONFIG_DISPLAY_PANEL_DSI_CM=m
-CONFIG_DISPLAY_PANEL_SONY_ACX565AKM=m
-CONFIG_DISPLAY_PANEL_LGPHILIPS_LB035Q02=m
-CONFIG_DISPLAY_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DISPLAY_PANEL_TPO_TD028TTEC1=m
-CONFIG_DISPLAY_PANEL_TPO_TD043MTEA1=m
-CONFIG_DISPLAY_PANEL_NEC_NL8048HL11=m
+CONFIG_FB_OMAP2_ENCODER_TFP410=m
+CONFIG_FB_OMAP2_ENCODER_TPD12S015=m
+CONFIG_FB_OMAP2_CONNECTOR_DVI=m
+CONFIG_FB_OMAP2_CONNECTOR_HDMI=m
+CONFIG_FB_OMAP2_CONNECTOR_ANALOG_TV=m
+CONFIG_FB_OMAP2_PANEL_DPI=m
+CONFIG_FB_OMAP2_PANEL_DSI_CM=m
+CONFIG_FB_OMAP2_PANEL_SONY_ACX565AKM=m
+CONFIG_FB_OMAP2_PANEL_LGPHILIPS_LB035Q02=m
+CONFIG_FB_OMAP2_PANEL_SHARP_LS037V7DW01=m
+CONFIG_FB_OMAP2_PANEL_TPO_TD028TTEC1=m
+CONFIG_FB_OMAP2_PANEL_TPO_TD043MTEA1=m
+CONFIG_FB_OMAP2_PANEL_NEC_NL8048HL11=m
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 CONFIG_LCD_CLASS_DEVICE=y
 CONFIG_LCD_PLATFORM=y
@@ -354,6 +355,11 @@
 CONFIG_USB_INVENTRA_DMA=y
 CONFIG_USB_TI_CPPI41_DMA=y
 CONFIG_USB_DWC3=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_TEST=m
 CONFIG_AM335X_PHY_USB=y
 CONFIG_USB_GADGET=m
@@ -387,6 +393,7 @@
 CONFIG_LEDS_CLASS=m
 CONFIG_LEDS_GPIO=m
 CONFIG_LEDS_PWM=m
+CONFIG_LEDS_PCA963X=m
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_ONESHOT=m
@@ -449,6 +456,8 @@
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_SPLIT=y
+CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index b445a5d..89a3a3e 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -364,7 +364,7 @@
 	.cra_blkcipher = {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
+		.ivsize		= 0,
 		.setkey		= ce_aes_setkey,
 		.encrypt	= ecb_encrypt,
 		.decrypt	= ecb_decrypt,
@@ -441,7 +441,7 @@
 	.cra_ablkcipher = {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
+		.ivsize		= 0,
 		.setkey		= ablk_set_key,
 		.encrypt	= ablk_encrypt,
 		.decrypt	= ablk_decrypt,
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 7da5503..e08d151 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -117,6 +117,7 @@
 	u32 irqstat;
 
 	asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
+	dsb(sy);
 	return irqstat;
 }
 
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d5525bf..9156fc3 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -491,7 +491,6 @@
 #endif
 
 #ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 #else
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 0375c8c..9408a99 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -35,14 +35,21 @@
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
 	     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-	bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
+	unsigned long page_pfn = page_to_xen_pfn(page);
+	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+	unsigned long compound_pages =
+		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+	bool local = (page_pfn <= dev_pfn) &&
+		(dev_pfn - page_pfn < compound_pages);
+
 	/*
-	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
-	 * multiple Xen page, it's not possible to have a mix of local and
-	 * foreign Xen page. So if the first xen_pfn == mfn the page is local
-	 * otherwise it's a foreign page grant-mapped in dom0. If the page is
-	 * local we can safely call the native dma_ops function, otherwise we
-	 * call the xen specific function.
+	 * Dom0 is mapped 1:1, while the Linux page can span across
+	 * multiple Xen pages, it's not possible for it to contain a
+	 * mix of local and foreign Xen pages. So if the first xen_pfn
+	 * == mfn the page is local otherwise it's a foreign page
+	 * grant-mapped in dom0. If the page is local we can safely
+	 * call the native dma_ops function, otherwise we call the xen
+	 * specific function.
 	 */
 	if (local)
 		__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ede692f..5dd2528 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -417,6 +417,7 @@
 #define __NR_userfaultfd		(__NR_SYSCALL_BASE+388)
 #define __NR_membarrier			(__NR_SYSCALL_BASE+389)
 #define __NR_mlock2			(__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 2c5f160..ad325a8 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -88,6 +88,7 @@
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
+AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index ac368bb..dfc7cd6 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -400,6 +400,7 @@
 		CALL(sys_userfaultfd)
 		CALL(sys_membarrier)
 		CALL(sys_mlock2)
+		CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7d0cba6f..139791e 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -176,13 +176,13 @@
 		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	},
 	{
 		.name = "Kernel data",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	}
 };
 
@@ -851,7 +851,7 @@
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dda1959..08e49c4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -506,18 +506,18 @@
 	struct kvm_vcpu *vcpu;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-		wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+		struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 
 		vcpu->arch.pause = false;
-		wake_up_interruptible(wq);
+		swake_up(wq);
 	}
 }
 
 static void vcpu_sleep(struct kvm_vcpu *vcpu)
 {
-	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 
-	wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
 				       (!vcpu->arch.pause)));
 }
 
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 5fa69d7..99361f1 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -161,7 +161,7 @@
 	u64 val;
 
 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
-	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
 }
 
 static unsigned long num_core_regs(void)
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 7f33b20..0f6600f 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -206,7 +206,8 @@
 	run->mmio.is_write	= is_write;
 	run->mmio.phys_addr	= fault_ipa;
 	run->mmio.len		= len;
-	memcpy(run->mmio.data, data_buf, len);
+	if (is_write)
+		memcpy(run->mmio.data, data_buf, len);
 
 	if (!ret) {
 		/* We handled the access successfully in the kernel. */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index a9b3b90..c2b1315 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -70,7 +70,7 @@
 {
 	struct kvm *kvm = source_vcpu->kvm;
 	struct kvm_vcpu *vcpu = NULL;
-	wait_queue_head_t *wq;
+	struct swait_queue_head *wq;
 	unsigned long cpu_id;
 	unsigned long context_id;
 	phys_addr_t target_pc;
@@ -119,7 +119,7 @@
 	smp_mb();		/* Make sure the above is visible */
 
 	wq = kvm_arch_vcpu_wq(vcpu);
-	wake_up_interruptible(wq);
+	swake_up(wq);
 
 	return PSCI_RET_SUCCESS;
 }
diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 77d6b1b..ee06fab 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -86,8 +86,8 @@
 {
 	dma_addr_t dma;
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
-		PANEL_SIZE, &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma,
+					  GFP_KERNEL);
 	if (!fb->fb.screen_base) {
 		printk(KERN_ERR "CLCD: unable to map framebuffer\n");
 		return -ENOMEM;
@@ -116,15 +116,14 @@
 
 static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-		fb->fb.screen_base, fb->fb.fix.smem_start,
-		fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 static void lpc32xx_clcd_remove(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-		fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }
 
 /*
diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c
index d122ee6..8814ee5 100644
--- a/arch/arm/mach-netx/fb.c
+++ b/arch/arm/mach-netx/fb.c
@@ -42,8 +42,8 @@
 
 	fb->panel = netx_panel;
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024,
-						    &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma,
+					  GFP_KERNEL);
 	if (!fb->fb.screen_base) {
 		printk(KERN_ERR "CLCD: unable to map framebuffer\n");
 		return -ENOMEM;
@@ -57,16 +57,14 @@
 
 int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-				     fb->fb.screen_base,
-				     fb->fb.fix.smem_start,
-				     fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void netx_clcd_remove(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-			      fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }
 
 static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL);
diff --git a/arch/arm/mach-nspire/clcd.c b/arch/arm/mach-nspire/clcd.c
index abea126..ea0e5b2 100644
--- a/arch/arm/mach-nspire/clcd.c
+++ b/arch/arm/mach-nspire/clcd.c
@@ -90,8 +90,8 @@
 	panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8;
 	panel_size = ALIGN(panel_size, PAGE_SIZE);
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
-		panel_size, &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma,
+					  GFP_KERNEL);
 
 	if (!fb->fb.screen_base) {
 		pr_err("CLCD: unable to map framebuffer\n");
@@ -107,13 +107,12 @@
 
 int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-		fb->fb.screen_base, fb->fb.fix.smem_start,
-		fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void nspire_clcd_remove(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-		fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 8098272..bab814d 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -18,6 +18,7 @@
 
 #include <asm/setup.h>
 #include <asm/mach/arch.h>
+#include <asm/system_info.h>
 
 #include "common.h"
 
@@ -77,12 +78,31 @@
 	NULL,
 };
 
+/* Set system_rev from atags */
+static void __init rx51_set_system_rev(const struct tag *tags)
+{
+	const struct tag *tag;
+
+	if (tags->hdr.tag != ATAG_CORE)
+		return;
+
+	for_each_tag(tag, tags) {
+		if (tag->hdr.tag == ATAG_REVISION) {
+			system_rev = tag->u.revision.rev;
+			break;
+		}
+	}
+}
+
 /* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags,
  * save them while the data is still not overwritten
  */
 static void __init rx51_reserve(void)
 {
-	save_atags((const struct tag *)(PAGE_OFFSET + 0x100));
+	const struct tag *tags = (const struct tag *)(PAGE_OFFSET + 0x100);
+
+	save_atags(tags);
+	rx51_set_system_rev(tags);
 	omap_reserve();
 }
 
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 9cda974..d7f1d69 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/platform_data/mailbox-omap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
@@ -66,32 +65,6 @@
 }
 omap_postcore_initcall(omap3_l3_init);
 
-#if defined(CONFIG_OMAP2PLUS_MBOX) || defined(CONFIG_OMAP2PLUS_MBOX_MODULE)
-static inline void __init omap_init_mbox(void)
-{
-	struct omap_hwmod *oh;
-	struct platform_device *pdev;
-	struct omap_mbox_pdata *pdata;
-
-	oh = omap_hwmod_lookup("mailbox");
-	if (!oh) {
-		pr_err("%s: unable to find hwmod\n", __func__);
-		return;
-	}
-	if (!oh->dev_attr) {
-		pr_err("%s: hwmod doesn't have valid attrs\n", __func__);
-		return;
-	}
-
-	pdata = (struct omap_mbox_pdata *)oh->dev_attr;
-	pdev = omap_device_build("omap-mailbox", -1, oh, pdata, sizeof(*pdata));
-	WARN(IS_ERR(pdev), "%s: could not build device, err %ld\n",
-						__func__, PTR_ERR(pdev));
-}
-#else
-static inline void omap_init_mbox(void) { }
-#endif /* CONFIG_OMAP2PLUS_MBOX */
-
 static inline void omap_init_sti(void) {}
 
 #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE)
@@ -229,7 +202,6 @@
 		 * please keep these calls, and their implementations above,
 		 * in alphabetical order so they're easier to sort through.
 		 */
-		omap_init_mbox();
 		omap_init_mcspi();
 		omap_init_sham();
 		omap_init_aes();
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 7b76ce0..8633c70 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -101,10 +101,8 @@
 
 static void set_onenand_cfg(void __iomem *onenand_base)
 {
-	u32 reg;
+	u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT;
 
-	reg = readw(onenand_base + ONENAND_REG_SYS_CFG1);
-	reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9));
 	reg |=	(latency << ONENAND_SYS_CFG1_BRL_SHIFT) |
 		ONENAND_SYS_CFG1_BL_16;
 	if (onenand_flags & ONENAND_FLAG_SYNCREAD)
@@ -123,6 +121,7 @@
 		reg |= ONENAND_SYS_CFG1_VHF;
 	else
 		reg &= ~ONENAND_SYS_CFG1_VHF;
+
 	writew(reg, onenand_base + ONENAND_REG_SYS_CFG1);
 }
 
@@ -289,6 +288,7 @@
 		}
 	}
 
+	onenand_async.sync_write = true;
 	omap2_onenand_calc_async_timings(&t);
 
 	ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 0437537..f7ff3b9 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -191,12 +191,22 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_device *od;
+	int err;
 
 	switch (event) {
 	case BUS_NOTIFY_DEL_DEVICE:
 		if (pdev->archdata.od)
 			omap_device_delete(pdev->archdata.od);
 		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		od = to_omap_device(pdev);
+		if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED)) {
+			dev_info(dev, "enabled after unload, idling\n");
+			err = omap_device_idle(pdev);
+			if (err)
+				dev_err(dev, "failed to idle\n");
+		}
+		break;
 	case BUS_NOTIFY_ADD_DEVICE:
 		if (pdev->dev.of_node)
 			omap_device_build_from_dt(pdev);
@@ -602,8 +612,10 @@
 	int ret;
 
 	ret = omap_device_enable(pdev);
-	if (ret)
+	if (ret) {
+		dev_err(dev, "use pm_runtime_put_sync_suspend() in driver?\n");
 		return ret;
+	}
 
 	return pm_generic_runtime_resume(dev);
 }
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index e9f65fe..b6d62e4 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2200,6 +2200,11 @@
  */
 static int _idle(struct omap_hwmod *oh)
 {
+	if (oh->flags & HWMOD_NO_IDLE) {
+		oh->_int_flags |= _HWMOD_SKIP_ENABLE;
+		return 0;
+	}
+
 	pr_debug("omap_hwmod: %s: idling\n", oh->name);
 
 	if (oh->_state != _HWMOD_STATE_ENABLED) {
@@ -2504,6 +2509,8 @@
 			oh->flags |= HWMOD_INIT_NO_RESET;
 		if (of_find_property(np, "ti,no-idle-on-init", NULL))
 			oh->flags |= HWMOD_INIT_NO_IDLE;
+		if (of_find_property(np, "ti,no-idle", NULL))
+			oh->flags |= HWMOD_NO_IDLE;
 	}
 
 	oh->_state = _HWMOD_STATE_INITIALIZED;
@@ -2630,7 +2637,7 @@
 	 * XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data -
 	 * it should be set by the core code as a runtime flag during startup
 	 */
-	if ((oh->flags & HWMOD_INIT_NO_IDLE) &&
+	if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) &&
 	    (postsetup_state == _HWMOD_STATE_IDLE)) {
 		oh->_int_flags |= _HWMOD_SKIP_ENABLE;
 		postsetup_state = _HWMOD_STATE_ENABLED;
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 76bce11..7c7a311 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -525,6 +525,8 @@
  *     or idled.
  * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
  *     operate and they need to be handled at the same time as the main_clk.
+ * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain
+ *     IPs like CPSW on DRA7, where clocks to this module cannot be disabled.
  */
 #define HWMOD_SWSUP_SIDLE			(1 << 0)
 #define HWMOD_SWSUP_MSTANDBY			(1 << 1)
@@ -541,6 +543,7 @@
 #define HWMOD_SWSUP_SIDLE_ACT			(1 << 12)
 #define HWMOD_RECONFIG_IO_CHAIN			(1 << 13)
 #define HWMOD_OPT_CLKS_NEEDED			(1 << 14)
+#define HWMOD_NO_IDLE				(1 << 15)
 
 /*
  * omap_hwmod._int_flags definitions
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index e781e4f..a935d28 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -23,6 +23,8 @@
 #include <linux/platform_data/pinctrl-single.h>
 #include <linux/platform_data/iommu-omap.h>
 #include <linux/platform_data/wkup_m3.h>
+#include <linux/platform_data/pwm_omap_dmtimer.h>
+#include <plat/dmtimer.h>
 
 #include "common.h"
 #include "common-board-devices.h"
@@ -449,6 +451,24 @@
 	dev->platform_data = &twl_gpio_auxdata;
 }
 
+/* Dual mode timer PWM callbacks platdata */
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+struct pwm_omap_dmtimer_pdata pwm_dmtimer_pdata = {
+	.request_by_node = omap_dm_timer_request_by_node,
+	.free = omap_dm_timer_free,
+	.enable = omap_dm_timer_enable,
+	.disable = omap_dm_timer_disable,
+	.get_fclk = omap_dm_timer_get_fclk,
+	.start = omap_dm_timer_start,
+	.stop = omap_dm_timer_stop,
+	.set_load = omap_dm_timer_set_load,
+	.set_match = omap_dm_timer_set_match,
+	.set_pwm = omap_dm_timer_set_pwm,
+	.set_prescaler = omap_dm_timer_set_prescaler,
+	.write_counter = omap_dm_timer_write_counter,
+};
+#endif
+
 /*
  * Few boards still need auxdata populated before we populate
  * the dev entries in of_platform_populate().
@@ -502,6 +522,9 @@
 	OF_DEV_AUXDATA("ti,am4372-wkup-m3", 0x44d00000, "44d00000.wkup_m3",
 		       &wkup_m3_data),
 #endif
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+	OF_DEV_AUXDATA("ti,omap-dmtimer-pwm", 0, NULL, &pwm_dmtimer_pdata),
+#endif
 #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5)
 	OF_DEV_AUXDATA("ti,omap4-iommu", 0x4a066000, "4a066000.mmu",
 		       &omap4_iommu_pdata),
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index eafd120..1b9f052 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -86,13 +86,18 @@
 	stmfd	sp!, {lr}	@ save registers on stack
 	/* Setup so that we will disable and enable l2 */
 	mov	r1, #0x1
-	adrl	r2, l2dis_3630	@ may be too distant for plain adr
-	str	r1, [r2]
+	adrl	r3, l2dis_3630_offset	@ may be too distant for plain adr
+	ldr	r2, [r3]		@ value for offset
+	str	r1, [r2, r3]		@ write to l2dis_3630
 	ldmfd	sp!, {pc}	@ restore regs and return
 ENDPROC(enable_omap3630_toggle_l2_on_restore)
 
-	.text
-/* Function to call rom code to save secure ram context */
+/*
+ * Function to call rom code to save secure ram context. This gets
+ * relocated to SRAM, so it can be all in .data section. Otherwise
+ * we need to initialize api_params separately.
+ */
+	.data
 	.align	3
 ENTRY(save_secure_ram_context)
 	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
@@ -126,6 +131,8 @@
 ENTRY(save_secure_ram_context_sz)
 	.word	. - save_secure_ram_context
 
+	.text
+
 /*
  * ======================
  * == Idle entry point ==
@@ -289,12 +296,6 @@
 	bic	r5, r5, #0x40
 	str	r5, [r4]
 
-/*
- * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
- * base instead.
- * Be careful not to clobber r7 when maintaing this code.
- */
-
 is_dll_in_lock_mode:
 	/* Is dll in lock mode? */
 	ldr	r4, sdrc_dlla_ctrl
@@ -302,11 +303,7 @@
 	tst	r5, #0x4
 	bne	exit_nonoff_modes	@ Return if locked
 	/* wait till dll locks */
-	adr	r7, kick_counter
 wait_dll_lock_timed:
-	ldr	r4, wait_dll_lock_counter
-	add	r4, r4, #1
-	str	r4, [r7, #wait_dll_lock_counter - kick_counter]
 	ldr	r4, sdrc_dlla_status
 	/* Wait 20uS for lock */
 	mov	r6, #8
@@ -330,9 +327,6 @@
 	orr	r6, r6, #(1<<3)		@ enable dll
 	str	r6, [r4]
 	dsb
-	ldr	r4, kick_counter
-	add	r4, r4, #1
-	str	r4, [r7]		@ kick_counter
 	b	wait_dll_lock_timed
 
 exit_nonoff_modes:
@@ -360,15 +354,6 @@
 	.word	SDRC_DLLA_STATUS_V
 sdrc_dlla_ctrl:
 	.word	SDRC_DLLA_CTRL_V
-	/*
-	 * When exporting to userspace while the counters are in SRAM,
-	 * these 2 words need to be at the end to facilitate retrival!
-	 */
-kick_counter:
-	.word	0
-wait_dll_lock_counter:
-	.word	0
-
 ENTRY(omap3_do_wfi_sz)
 	.word	. - omap3_do_wfi
 
@@ -437,7 +422,9 @@
 	cmp	r2, #0x0	@ Check if target power state was OFF or RET
 	bne	logic_l1_restore
 
-	ldr	r0, l2dis_3630
+	adr	r1, l2dis_3630_offset	@ address for offset
+	ldr	r0, [r1]		@ value for offset
+	ldr	r0, [r1, r0]		@ value at l2dis_3630
 	cmp	r0, #0x1	@ should we disable L2 on 3630?
 	bne	skipl2dis
 	mrc	p15, 0, r0, c1, c0, 1
@@ -449,12 +436,14 @@
 	and	r1, #0x700
 	cmp	r1, #0x300
 	beq	l2_inv_gp
+	adr	r0, l2_inv_api_params_offset
+	ldr	r3, [r0]
+	add	r3, r3, r0		@ r3 points to dummy parameters
 	mov	r0, #40			@ set service ID for PPA
 	mov	r12, r0			@ copy secure Service ID in r12
 	mov	r1, #0			@ set task id for ROM code in r1
 	mov	r2, #4			@ set some flags in r2, r6
 	mov	r6, #0xff
-	adr	r3, l2_inv_api_params	@ r3 points to dummy parameters
 	dsb				@ data write barrier
 	dmb				@ data memory barrier
 	smc	#1			@ call SMI monitor (smi #1)
@@ -488,8 +477,8 @@
 	b	logic_l1_restore
 
 	.align
-l2_inv_api_params:
-	.word	0x1, 0x00
+l2_inv_api_params_offset:
+	.long	l2_inv_api_params - .
 l2_inv_gp:
 	/* Execute smi to invalidate L2 cache */
 	mov r12, #0x1			@ set up to invalidate L2
@@ -506,7 +495,9 @@
 	mov	r12, #0x2
 	smc	#0			@ Call SMI monitor (smieq)
 logic_l1_restore:
-	ldr	r1, l2dis_3630
+	adr	r0, l2dis_3630_offset	@ adress for offset
+	ldr	r1, [r0]		@ value for offset
+	ldr	r1, [r0, r1]		@ value at l2dis_3630
 	cmp	r1, #0x1		@ Test if L2 re-enable needed on 3630
 	bne	skipl2reen
 	mrc	p15, 0, r1, c1, c0, 1
@@ -535,9 +526,17 @@
 	.word	CONTROL_STAT
 control_mem_rta:
 	.word	CONTROL_MEM_RTA_CTRL
+l2dis_3630_offset:
+	.long	l2dis_3630 - .
+
+	.data
 l2dis_3630:
 	.word	0
 
+	.data
+l2_inv_api_params:
+	.word	0x1, 0x00
+
 /*
  * Internal functions
  */
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
index 9b09d85..c7a3b4a 100644
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ b/arch/arm/mach-omap2/sleep44xx.S
@@ -29,12 +29,6 @@
 	dsb
 .endm
 
-ppa_zero_params:
-	.word		0x0
-
-ppa_por_params:
-	.word		1, 0
-
 #ifdef CONFIG_ARCH_OMAP4
 
 /*
@@ -266,7 +260,9 @@
 	beq	skip_ns_smp_enable
 ppa_actrl_retry:
 	mov     r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
-	adr	r3, ppa_zero_params		@ Pointer to parameters
+	adr	r1, ppa_zero_params_offset
+	ldr	r3, [r1]
+	add	r3, r3, r1			@ Pointer to ppa_zero_params
 	mov	r1, #0x0			@ Process ID
 	mov	r2, #0x4			@ Flag
 	mov	r6, #0xff
@@ -303,7 +299,9 @@
 	ldr     r0, =OMAP4_PPA_L2_POR_INDEX
 	ldr     r1, =OMAP44XX_SAR_RAM_BASE
 	ldr     r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
-	adr     r3, ppa_por_params
+	adr     r1, ppa_por_params_offset
+	ldr	r3, [r1]
+	add	r3, r3, r1			@ Pointer to ppa_por_params
 	str     r4, [r3, #0x04]
 	mov	r1, #0x0			@ Process ID
 	mov	r2, #0x4			@ Flag
@@ -328,6 +326,8 @@
 #endif
 
 	b	cpu_resume			@ Jump to generic resume
+ppa_por_params_offset:
+	.long	ppa_por_params - .
 ENDPROC(omap4_cpu_resume)
 #endif	/* CONFIG_ARCH_OMAP4 */
 
@@ -380,4 +380,13 @@
 	nop
 
 	ldmfd	sp!, {pc}
+ppa_zero_params_offset:
+	.long	ppa_zero_params - .
 ENDPROC(omap_do_wfi)
+
+	.data
+ppa_zero_params:
+	.word		0
+
+ppa_por_params:
+	.word		1, 0
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index def40a0..70ab4a2 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -1,5 +1,6 @@
 menuconfig ARCH_REALVIEW
-	bool "ARM Ltd. RealView family" if ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
+	bool "ARM Ltd. RealView family"
+	depends on ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
 	select ARM_AMBA
 	select ARM_TIMER_SP804
 	select COMMON_CLK_VERSATILE
diff --git a/arch/arm/mach-realview/platsmp-dt.c b/arch/arm/mach-realview/platsmp-dt.c
index 6558539..6964e88 100644
--- a/arch/arm/mach-realview/platsmp-dt.c
+++ b/arch/arm/mach-realview/platsmp-dt.c
@@ -80,7 +80,7 @@
 		     virt_to_phys(versatile_secondary_startup));
 }
 
-struct smp_operations realview_dt_smp_ops __initdata = {
+static const struct smp_operations realview_dt_smp_ops __initconst = {
 	.smp_prepare_cpus	= realview_smp_prepare_cpus,
 	.smp_secondary_init	= versatile_secondary_init,
 	.smp_boot_secondary	= versatile_boot_secondary,
diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 9cb1121..b3a4ed5 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h
@@ -4,7 +4,6 @@
 extern void shmobile_init_delay(void);
 extern void shmobile_boot_vector(void);
 extern unsigned long shmobile_boot_fn;
-extern unsigned long shmobile_boot_arg;
 extern unsigned long shmobile_boot_size;
 extern void shmobile_smp_boot(void);
 extern void shmobile_smp_sleep(void);
diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S
index fa5248c..5e503d9 100644
--- a/arch/arm/mach-shmobile/headsmp-scu.S
+++ b/arch/arm/mach-shmobile/headsmp-scu.S
@@ -38,9 +38,3 @@
 
 	b	secondary_startup
 ENDPROC(shmobile_boot_scu)
-
-	.text
-	.align	2
-	.globl	shmobile_scu_base
-shmobile_scu_base:
-	.space	4
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index 330c1fc..32e0bf6 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -24,7 +24,6 @@
 	.arm
 	.align  12
 ENTRY(shmobile_boot_vector)
-	ldr     r0, 2f
 	ldr     r1, 1f
 	bx	r1
 
@@ -34,9 +33,6 @@
 	.globl	shmobile_boot_fn
 shmobile_boot_fn:
 1:	.space	4
-	.globl	shmobile_boot_arg
-shmobile_boot_arg:
-2:	.space	4
 	.globl	shmobile_boot_size
 shmobile_boot_size:
 	.long	. - shmobile_boot_vector
@@ -46,13 +42,15 @@
  */
 
 ENTRY(shmobile_smp_boot)
-						@ r0 = MPIDR_HWID_BITMASK
 	mrc	p15, 0, r1, c0, c0, 5		@ r1 = MPIDR
-	and	r0, r1, r0			@ r0 = cpu_logical_map() value
+	and	r0, r1, #0xffffff		@ MPIDR_HWID_BITMASK
+						@ r0 = cpu_logical_map() value
 	mov	r1, #0				@ r1 = CPU index
-	adr	r5, 1f				@ array of per-cpu mpidr values
-	adr	r6, 2f				@ array of per-cpu functions
-	adr	r7, 3f				@ array of per-cpu arguments
+	adr	r2, 1f
+	ldmia	r2, {r5, r6, r7}
+	add	r5, r5, r2			@ array of per-cpu mpidr values
+	add	r6, r6, r2			@ array of per-cpu functions
+	add	r7, r7, r2			@ array of per-cpu arguments
 
 shmobile_smp_boot_find_mpidr:
 	ldr	r8, [r5, r1, lsl #2]
@@ -80,12 +78,18 @@
 	b	shmobile_smp_boot
 ENDPROC(shmobile_smp_sleep)
 
+	.align	2
+1:	.long	shmobile_smp_mpidr - .
+	.long	shmobile_smp_fn - 1b
+	.long	shmobile_smp_arg - 1b
+
+	.bss
 	.globl	shmobile_smp_mpidr
 shmobile_smp_mpidr:
-1:	.space	NR_CPUS * 4
+	.space	NR_CPUS * 4
 	.globl	shmobile_smp_fn
 shmobile_smp_fn:
-2:	.space	NR_CPUS * 4
+	.space	NR_CPUS * 4
 	.globl	shmobile_smp_arg
 shmobile_smp_arg:
-3:	.space	NR_CPUS * 4
+	.space	NR_CPUS * 4
diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
index 911884f..aba75c8 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c
@@ -123,7 +123,6 @@
 {
 	/* install boot code shared by all CPUs */
 	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
-	shmobile_boot_arg = MPIDR_HWID_BITMASK;
 
 	/* perform per-cpu setup */
 	apmu_parse_cfg(apmu_init_cpu, apmu_config, num);
diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
index 6466311..081a097 100644
--- a/arch/arm/mach-shmobile/platsmp-scu.c
+++ b/arch/arm/mach-shmobile/platsmp-scu.c
@@ -17,6 +17,9 @@
 #include <asm/smp_scu.h>
 #include "common.h"
 
+
+void __iomem *shmobile_scu_base;
+
 static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb,
 					  unsigned long action, void *hcpu)
 {
@@ -41,7 +44,6 @@
 {
 	/* install boot code shared by all CPUs */
 	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
-	shmobile_boot_arg = MPIDR_HWID_BITMASK;
 
 	/* enable SCU and cache coherency on booting CPU */
 	scu_enable(shmobile_scu_base);
diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c
index b854fe2..0b024a9 100644
--- a/arch/arm/mach-shmobile/smp-r8a7779.c
+++ b/arch/arm/mach-shmobile/smp-r8a7779.c
@@ -92,8 +92,6 @@
 {
 	/* Map the reset vector (in headsmp-scu.S, headsmp.S) */
 	__raw_writel(__pa(shmobile_boot_vector), AVECR);
-	shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
-	shmobile_boot_arg = (unsigned long)shmobile_scu_base;
 
 	/* setup r8a7779 specific SCU bits */
 	shmobile_scu_base = IOMEM(R8A7779_SCU_BASE);
diff --git a/arch/arm/mach-tango/Kconfig b/arch/arm/mach-tango/Kconfig
index d6a3714..ebe15b9 100644
--- a/arch/arm/mach-tango/Kconfig
+++ b/arch/arm/mach-tango/Kconfig
@@ -1,5 +1,6 @@
 config ARCH_TANGO
-	bool "Sigma Designs Tango4 (SMP87xx)" if ARCH_MULTI_V7
+	bool "Sigma Designs Tango4 (SMP87xx)"
+	depends on ARCH_MULTI_V7
 	# Cortex-A9 MPCore r3p0, PL310 r3p2
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARM_ERRATA_754322
diff --git a/arch/arm/mach-tango/platsmp.c b/arch/arm/mach-tango/platsmp.c
index a18d5a3..a21f55e 100644
--- a/arch/arm/mach-tango/platsmp.c
+++ b/arch/arm/mach-tango/platsmp.c
@@ -9,7 +9,7 @@
 	return 0;
 }
 
-static struct smp_operations tango_smp_ops __initdata = {
+static const struct smp_operations tango_smp_ops __initconst = {
 	.smp_boot_secondary	= tango_boot_secondary,
 };
 
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 4b4058d..66353ca 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -173,7 +173,7 @@
 {
 	unsigned long rnd;
 
-	rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+	rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index cf30daf..d19b1ad 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -49,6 +49,9 @@
 		WARN_ON_ONCE(1);
 	}
 
+	if (!numpages)
+		return 0;
+
 	if (start < MODULES_VADDR || start >= MODULES_END)
 		return -EINVAL;
 
diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c
index 04aff2c..70f2f69 100644
--- a/arch/arm/plat-samsung/pm-check.c
+++ b/arch/arm/plat-samsung/pm-check.c
@@ -53,8 +53,8 @@
 		if (ptr->child != NULL)
 			s3c_pm_run_res(ptr->child, fn, arg);
 
-		if ((ptr->flags & IORESOURCE_MEM) &&
-		    strcmp(ptr->name, "System RAM") == 0) {
+		if ((ptr->flags & IORESOURCE_SYSTEM_RAM)
+				== IORESOURCE_SYSTEM_RAM) {
 			S3C_PMDBG("Found system RAM at %08lx..%08lx\n",
 				  (unsigned long)ptr->start,
 				  (unsigned long)ptr->end);
diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S
index b2b97e3..a62a7b6 100644
--- a/arch/arm/vdso/vdso.S
+++ b/arch/arm/vdso/vdso.S
@@ -23,9 +23,8 @@
 #include <linux/const.h>
 #include <asm/page.h>
 
-	__PAGE_ALIGNED_DATA
-
 	.globl vdso_start, vdso_end
+	.section .data..ro_after_init
 	.balign PAGE_SIZE
 vdso_start:
 	.incbin "arch/arm/vdso/vdso.so"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index cd822d8..b5e3f6d 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -27,6 +27,8 @@
 endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
@@ -86,7 +88,7 @@
 Image.%: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-zinstall install: vmlinux
+zinstall install:
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 %.dtb: scripts
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index abcbba2..305c552 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -34,10 +34,10 @@
 $(obj)/Image.lzo: $(obj)/Image FORCE
 	$(call if_changed,lzo)
 
-install: $(obj)/Image
+install:
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/Image System.map "$(INSTALL_PATH)"
 
-zinstall: $(obj)/Image.gz
+zinstall:
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi
index dd5158e..e5b59ca 100644
--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
@@ -115,6 +115,7 @@
 			     <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
index da7b6e6..933cba3 100644
--- a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
@@ -23,9 +23,8 @@
 		};
 	};
 
-	dsa: dsa@c7000000 {
+	dsaf0: dsa@c7000000 {
 		compatible = "hisilicon,hns-dsaf-v1";
-		dsa_name = "dsaf0";
 		mode = "6port-16rss";
 		interrupt-parent = <&mbigen_dsa>;
 
@@ -127,7 +126,7 @@
 
 	eth0: ethernet@0{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <0>;
 		local-mac-address = [00 00 00 01 00 58];
 		status = "disabled";
@@ -135,14 +134,14 @@
 	};
 	eth1: ethernet@1{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <1>;
 		status = "disabled";
 		dma-coherent;
 	};
 	eth2: ethernet@2{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <2>;
 		local-mac-address = [00 00 00 01 00 5a];
 		status = "disabled";
@@ -150,7 +149,7 @@
 	};
 	eth3: ethernet@3{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <3>;
 		local-mac-address = [00 00 00 01 00 5b];
 		status = "disabled";
@@ -158,7 +157,7 @@
 	};
 	eth4: ethernet@4{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <4>;
 		local-mac-address = [00 00 00 01 00 5c];
 		status = "disabled";
@@ -166,7 +165,7 @@
 	};
 	eth5: ethernet@5{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <5>;
 		local-mac-address = [00 00 00 01 00 5d];
 		status = "disabled";
@@ -174,7 +173,7 @@
 	};
 	eth6: ethernet@6{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <6>;
 		local-mac-address = [00 00 00 01 00 5e];
 		status = "disabled";
@@ -182,7 +181,7 @@
 	};
 	eth7: ethernet@7{
 		compatible = "hisilicon,hns-nic-v1";
-		ae-name = "dsaf0";
+		ae-handle = <&dsaf0>;
 		port-id = <7>;
 		local-mac-address = [00 00 00 01 00 5f];
 		status = "disabled";
diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
index 7dfe1c0..62f33fc 100644
--- a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
@@ -12,6 +12,8 @@
 		rtc1 = "/rtc@0,7000e000";
 	};
 
+	chosen { };
+
 	memory {
 		device_type = "memory";
 		reg = <0x0 0x80000000 0x0 0x80000000>;
diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh
index 12ed78a..d91e1f0 100644
--- a/arch/arm64/boot/install.sh
+++ b/arch/arm64/boot/install.sh
@@ -20,6 +20,20 @@
 #   $4 - default install path (blank if root directory)
 #
 
+verify () {
+	if [ ! -f "$1" ]; then
+		echo ""                                                   1>&2
+		echo " *** Missing file: $1"                              1>&2
+		echo ' *** You need to run "make" before "make install".' 1>&2
+		echo ""                                                   1>&2
+		exit 1
+	fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
 # User may have a custom install script
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
 if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 18ca9fb..86581f7 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -16,7 +16,6 @@
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 05d9e16..7a3d22a 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -294,7 +294,7 @@
 	.cra_blkcipher = {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
+		.ivsize		= 0,
 		.setkey		= aes_setkey,
 		.encrypt	= ecb_encrypt,
 		.decrypt	= ecb_decrypt,
@@ -371,7 +371,7 @@
 	.cra_ablkcipher = {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
+		.ivsize		= 0,
 		.setkey		= ablk_set_key,
 		.encrypt	= ablk_encrypt,
 		.decrypt	= ablk_decrypt,
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 2731d3b..8ec88e5 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -103,6 +103,7 @@
 	u64 irqstat;
 
 	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	dsb(sy);
 	return irqstat;
 }
 
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7fc294c..22dda61 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -156,8 +156,4 @@
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
 #endif
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 007a69f..5f3ab8c 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -121,6 +121,7 @@
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
@@ -137,6 +138,7 @@
 "	.align	3\n"
 "	.quad	1b, 4b, 2b, 4b\n"
 "	.popsection\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
 	: "memory");
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 738a95f..d201d4b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,8 +107,6 @@
 #define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
 			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
 
-#define TCR_EL2_FLAGS	(TCR_EL2_RES1 | TCR_EL2_PS_40B)
-
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_RES1		(1 << 31)
 #define VTCR_EL2_PS_MASK	(7 << 16)
@@ -182,6 +180,7 @@
 #define CPTR_EL2_TCPAC	(1 << 31)
 #define CPTR_EL2_TTA	(1 << 20)
 #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
+#define CPTR_EL2_DEFAULT	0x000033ff
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_TDRA		(1 << 11)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 3066328..779a587 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -127,10 +127,14 @@
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
-	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+	u32 mode;
 
-	if (vcpu_mode_is_32bit(vcpu))
+	if (vcpu_mode_is_32bit(vcpu)) {
+		mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
 		return mode > COMPAT_PSR_MODE_USR;
+	}
+
+	mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
 	return mode != PSR_MODE_EL0t;
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 9b2f5a9..ae615b9 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
 #include <asm/pgtable-types.h>
 
 extern void __cpu_clear_user_page(void *p, unsigned long user);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2d545d7a..819aff5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -34,7 +34,7 @@
 /*
  * VMALLOC and SPARSEMEM_VMEMMAP ranges.
  *
- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
+ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
  *	(rounded up to PUD_SIZE).
  * VMALLOC_START: beginning of the kernel VA space
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
@@ -51,7 +51,9 @@
 
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
-#define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
+#define VMEMMAP_START		(VMALLOC_END + SZ_64K)
+#define vmemmap			((struct page *)VMEMMAP_START - \
+				 SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
 
 #define FIRST_USER_ADDRESS	0UL
 
@@ -67,11 +69,11 @@
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +83,7 @@
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX		__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +155,7 @@
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +166,6 @@
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +279,13 @@
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid_user(pte)) {
-		if (!pte_special(pte) && pte_exec(pte))
-			__sync_icache_dcache(pte, addr);
+	if (pte_valid(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
+		if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+			__sync_icache_dcache(pte, addr);
 	}
 
 	/*
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 8aee3ae..c536c9e 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -226,11 +226,28 @@
 	return retval;
 }
 
+static void send_user_sigtrap(int si_code)
+{
+	struct pt_regs *regs = current_pt_regs();
+	siginfo_t info = {
+		.si_signo	= SIGTRAP,
+		.si_errno	= 0,
+		.si_code	= si_code,
+		.si_addr	= (void __user *)instruction_pointer(regs),
+	};
+
+	if (WARN_ON(!user_mode(regs)))
+		return;
+
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
+	force_sig_info(SIGTRAP, &info, current);
+}
+
 static int single_step_handler(unsigned long addr, unsigned int esr,
 			       struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	/*
 	 * If we are stepping a pending breakpoint, call the hw_breakpoint
 	 * handler first.
@@ -239,11 +256,7 @@
 		return 0;
 
 	if (user_mode(regs)) {
-		info.si_signo = SIGTRAP;
-		info.si_errno = 0;
-		info.si_code  = TRAP_HWBKPT;
-		info.si_addr  = (void __user *)instruction_pointer(regs);
-		force_sig_info(SIGTRAP, &info, current);
+		send_user_sigtrap(TRAP_HWBKPT);
 
 		/*
 		 * ptrace will disable single step unless explicitly
@@ -307,17 +320,8 @@
 static int brk_handler(unsigned long addr, unsigned int esr,
 		       struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	if (user_mode(regs)) {
-		info = (siginfo_t) {
-			.si_signo = SIGTRAP,
-			.si_errno = 0,
-			.si_code  = TRAP_BRKPT,
-			.si_addr  = (void __user *)instruction_pointer(regs),
-		};
-
-		force_sig_info(SIGTRAP, &info, current);
+		send_user_sigtrap(TRAP_BRKPT);
 	} else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
 		pr_warning("Unexpected kernel BRK exception at EL1\n");
 		return -EFAULT;
@@ -328,7 +332,6 @@
 
 int aarch32_break_handler(struct pt_regs *regs)
 {
-	siginfo_t info;
 	u32 arm_instr;
 	u16 thumb_instr;
 	bool bp = false;
@@ -359,14 +362,7 @@
 	if (!bp)
 		return -EFAULT;
 
-	info = (siginfo_t) {
-		.si_signo = SIGTRAP,
-		.si_errno = 0,
-		.si_code  = TRAP_BRKPT,
-		.si_addr  = pc,
-	};
-
-	force_sig_info(SIGTRAP, &info, current);
+	send_user_sigtrap(TRAP_BRKPT);
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ffe9c2b..917d981 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -514,9 +514,14 @@
 #endif
 
 	/* EL2 debug */
+	mrs	x0, id_aa64dfr0_el1		// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	x0, x0, #8, #4
+	cmp	x0, #1
+	b.lt	4f				// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
 	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
 	msr	mdcr_el2, x0			// all PMU counters from EL1
+4:
 
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8..352f7ab 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -65,6 +65,16 @@
 #ifdef CONFIG_EFI
 
 /*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)	ABSOLUTE(sym)
+
+/*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
  * accessed by the stub, so provide some aliases to make them accessible.
@@ -73,25 +83,26 @@
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp		= __pi_memcmp;
-__efistub_memchr		= __pi_memchr;
-__efistub_memcpy		= __pi_memcpy;
-__efistub_memmove		= __pi_memmove;
-__efistub_memset		= __pi_memset;
-__efistub_strlen		= __pi_strlen;
-__efistub_strcmp		= __pi_strcmp;
-__efistub_strncmp		= __pi_strncmp;
-__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+__efistub_memcmp		= KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr		= KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset		= KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen		= KALLSYMS_HIDE(__pi_strlen);
+__efistub_strnlen		= KALLSYMS_HIDE(__pi_strnlen);
+__efistub_strcmp		= KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp		= KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area	= KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy		= __pi_memcpy;
-__efistub___memmove		= __pi_memmove;
-__efistub___memset		= __pi_memset;
+__efistub___memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset		= KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text			= _text;
-__efistub__end			= _end;
-__efistub__edata		= _edata;
+__efistub__text			= KALLSYMS_HIDE(_text);
+__efistub__end			= KALLSYMS_HIDE(_end);
+__efistub__edata		= KALLSYMS_HIDE(_edata);
 
 #endif
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479..450987d 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -73,13 +73,13 @@
 		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	},
 	{
 		.name = "Kernel data",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	}
 };
 
@@ -210,7 +210,7 @@
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index e33fe33..fd10eb6 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -145,6 +145,10 @@
 ENDPROC(cpu_resume_mmu)
 	.popsection
 cpu_resume_after_mmu:
+#ifdef CONFIG_KASAN
+	mov	x0, sp
+	bl	kasan_unpoison_remaining_stack
+#endif
 	mov	x0, #0			// return zero on success
 	ldp	x19, x20, [sp, #16]
 	ldp	x21, x22, [sp, #32]
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 4fad978..d9751a4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,14 +44,13 @@
 	unsigned long irq_stack_ptr;
 
 	/*
-	 * Use raw_smp_processor_id() to avoid false-positives from
-	 * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
-	 * task stacks, we can be pre-empted in this case, so
-	 * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
-	 * tasks can't ever be on an interrupt stack, so regardless of cpu,
-	 * the checks will always fail.
+	 * Switching between stacks is valid when tracing current and in
+	 * non-preemptible context.
 	 */
-	irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
+	if (tsk == current && !preemptible())
+		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	else
+		irq_stack_ptr = 0;
 
 	low  = frame->sp;
 	/* irq stacks are not THREAD_SIZE aligned */
@@ -64,8 +63,8 @@
 		return -EINVAL;
 
 	frame->sp = fp + 0x10;
-	frame->fp = *(unsigned long *)(fp);
-	frame->pc = *(unsigned long *)(fp + 8);
+	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk && tsk->ret_stack &&
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index cbedd72..c539208 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -146,9 +146,18 @@
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	unsigned long irq_stack_ptr;
 	int skip;
 
+	/*
+	 * Switching between stacks is valid when tracing current and in
+	 * non-preemptible context.
+	 */
+	if (tsk == current && !preemptible())
+		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	else
+		irq_stack_ptr = 0;
+
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
 	if (!tsk)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index fcb7788..9e54ad7 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -194,7 +194,7 @@
 	u64 val;
 
 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
-	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
 }
 
 /**
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3e568dc..d073b5a 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -64,7 +64,7 @@
 	mrs	x4, tcr_el1
 	ldr	x5, =TCR_EL2_MASK
 	and	x4, x4, x5
-	ldr	x5, =TCR_EL2_FLAGS
+	mov	x5, #TCR_EL2_RES1
 	orr	x4, x4, x5
 
 #ifndef CONFIG_ARM64_VA_BITS_48
@@ -85,14 +85,16 @@
 	ldr_l	x5, idmap_t0sz
 	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
 #endif
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+	 * TCR_EL2 and VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR0_EL1
+	bfi	x4, x5, #16, #3
+
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
-	/*
-	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
-	 * VTCR_EL2.
-	 */
-	mrs	x5, ID_AA64MMFR0_EL1
 	bfi	x4, x5, #16, #3
 	/*
 	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ca8f5a5..f0e7bdf 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -36,7 +36,11 @@
 	write_sysreg(val, hcr_el2);
 	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
 	write_sysreg(1 << 15, hstr_el2);
-	write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+
+	val = CPTR_EL2_DEFAULT;
+	val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+	write_sysreg(val, cptr_el2);
+
 	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
@@ -45,7 +49,7 @@
 	write_sysreg(HCR_RW, hcr_el2);
 	write_sysreg(0, hstr_el2);
 	write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
-	write_sysreg(0, cptr_el2);
+	write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 9142e08..5dd2a26 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -149,16 +149,6 @@
 
 	switch (nr_pri_bits) {
 	case 7:
-		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
-		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
-	case 6:
-		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
-	default:
-		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
-	}	 	                           
-		 	                           
-	switch (nr_pri_bits) {
-	case 7:
 		 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
 		 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
 	case 6:
@@ -167,6 +157,16 @@
 		 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
 	}
 
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+	}
+
 	switch (max_lr_idx) {
 	case 15:
 		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 648112e..4d1ac81 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -27,7 +27,11 @@
 
 #define PSTATE_FAULT_BITS_64 	(PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
 				 PSR_I_BIT | PSR_D_BIT)
-#define EL1_EXCEPT_SYNC_OFFSET	0x200
+
+#define CURRENT_EL_SP_EL0_VECTOR	0x0
+#define CURRENT_EL_SP_ELx_VECTOR	0x200
+#define LOWER_EL_AArch64_VECTOR		0x400
+#define LOWER_EL_AArch32_VECTOR		0x600
 
 static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 {
@@ -97,6 +101,34 @@
 		*fsr = 0x14;
 }
 
+enum exception_type {
+	except_type_sync	= 0,
+	except_type_irq		= 0x80,
+	except_type_fiq		= 0x100,
+	except_type_serror	= 0x180,
+};
+
+static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
+{
+	u64 exc_offset;
+
+	switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
+	case PSR_MODE_EL1t:
+		exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+		break;
+	case PSR_MODE_EL1h:
+		exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+		break;
+	case PSR_MODE_EL0t:
+		exc_offset = LOWER_EL_AArch64_VECTOR;
+		break;
+	default:
+		exc_offset = LOWER_EL_AArch32_VECTOR;
+	}
+
+	return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -108,8 +140,8 @@
 	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
 	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
@@ -143,8 +175,8 @@
 	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
 	/*
 	 * Build an unknown exception, depending on the instruction
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index eec3598..2e90371 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1007,10 +1007,9 @@
 		if (likely(r->access(vcpu, params, r))) {
 			/* Skip instruction, since it was emulated */
 			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			/* Handled */
+			return 0;
 		}
-
-		/* Handled */
-		return 0;
 	}
 
 	/* Not handled */
@@ -1043,7 +1042,7 @@
 }
 
 /**
- * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
@@ -1095,7 +1094,7 @@
 }
 
 /**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index 2ca6657..eae38da 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -168,4 +168,4 @@
 .Lhit_limit:
 	mov	len, limit
 	ret
-ENDPROC(strnlen)
+ENDPIPROC(strnlen)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 331c4ca..a6e757c 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -933,6 +933,10 @@
 		ret = register_iommu_dma_ops_notifier(&platform_bus_type);
 	if (!ret)
 		ret = register_iommu_dma_ops_notifier(&amba_bustype);
+
+	/* handle devices queued before this arch_initcall */
+	if (!ret)
+		__iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
 	return ret;
 }
 arch_initcall(__iommu_dma_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a11..0adbebb 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@
 	PCI_START_NR,
 	PCI_END_NR,
 	MODULES_START_NR,
-	MODUELS_END_NR,
+	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 92ddac1..abe2a95 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -371,6 +371,13 @@
 	return 0;
 }
 
+static int do_alignment_fault(unsigned long addr, unsigned int esr,
+			      struct pt_regs *regs)
+{
+	do_bad_area(addr, esr, regs);
+	return 0;
+}
+
 /*
  * This abort handler always returns "fault".
  */
@@ -418,7 +425,7 @@
 	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk)" },
 	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk)" },
 	{ do_bad,		SIGBUS,  0,		"unknown 32"			},
-	{ do_bad,		SIGBUS,  BUS_ADRALN,	"alignment fault"		},
+	{ do_alignment_fault,	SIGBUS,  BUS_ADRALN,	"alignment fault"		},
 	{ do_bad,		SIGBUS,  0,		"unknown 34"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 35"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 36"			},
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c..da30529 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -306,10 +306,6 @@
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
-		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 		return 0;
@@ -317,13 +313,3 @@
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-		hugetlb_add_hstate(CONT_PMD_SHIFT);
-	return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f3b061e..7802f21 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -319,8 +319,8 @@
 #endif
 		  MLG(VMALLOC_START, VMALLOC_END),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-		  MLG((unsigned long)vmemmap,
-		      (unsigned long)vmemmap + VMEMMAP_SIZE),
+		  MLG(VMEMMAP_START,
+		      VMEMMAP_START + VMEMMAP_SIZE),
 		  MLM((unsigned long)virt_to_page(PAGE_OFFSET),
 		      (unsigned long)virt_to_page(high_memory)),
 #endif
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7..cab7a5b 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
+	int i;
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 
+	/*
+	 * KAsan may reuse the contents of kasan_zero_pte directly, so we
+	 * should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte(&kasan_zero_pte[i],
+			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	cpu_set_ttbr1(__pa(swapper_pg_dir));
 	flush_tlb_all();
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 4c893b5..232f787 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -53,10 +53,10 @@
 
 #ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT))
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 	else
 #endif
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 	return rnd << PAGE_SHIFT;
 }
 
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 3571c73..0795c3a 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -44,6 +45,7 @@
 	unsigned long end = start + size;
 	int ret;
 	struct page_change_data data;
+	struct vm_struct *area;
 
 	if (!PAGE_ALIGNED(addr)) {
 		start &= PAGE_MASK;
@@ -51,11 +53,27 @@
 		WARN_ON_ONCE(1);
 	}
 
-	if (start < MODULES_VADDR || start >= MODULES_END)
+	/*
+	 * Kernel VA mappings are always live, and splitting live section
+	 * mappings into page mappings may cause TLB conflicts. This means
+	 * we have to ensure that changing the permission bits of the range
+	 * we are operating on does not result in such splitting.
+	 *
+	 * Let's restrict ourselves to mappings created by vmalloc (or vmap).
+	 * Those are guaranteed to consist entirely of page mappings, and
+	 * splitting is never needed.
+	 *
+	 * So check whether the [addr, addr + size) interval is entirely
+	 * covered by precisely one VM area that has the VM_ALLOC flag set.
+	 */
+	area = find_vm_area((void *)addr);
+	if (!area ||
+	    end > (unsigned long)area->addr + area->size ||
+	    !(area->flags & VM_ALLOC))
 		return -EINVAL;
 
-	if (end < MODULES_VADDR || end >= MODULES_END)
-		return -EINVAL;
+	if (!numpages)
+		return 0;
 
 	data.set_mask = set_mask;
 	data.clear_mask = clear_mask;
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 146bd99..e6a30e1 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -84,3 +84,15 @@
 	b.lo	9998b
 	dsb	\domain
 	.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+	.macro	reset_pmuserenr_el0, tmpreg
+	mrs	\tmpreg, id_aa64dfr0_el1	// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	\tmpreg, \tmpreg, #8, #4
+	cmp	\tmpreg, #1			// Skip if no PMU present
+	b.lt	9000f
+	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+9000:
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a3d867e..c164d2c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -117,7 +117,7 @@
 	 */
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	mov	x0, x12
 	dsb	nsh		// Make sure local tlb invalidation completed
 	isb
@@ -154,7 +154,7 @@
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
 	mov	x0, #1 << 12			// Reset mdscr_el1 and disable
 	msr	mdscr_el1, x0			// access to the DCC from EL0
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	/*
 	 * Memory region attributes for LPAE:
 	 *
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 209ae5a..e692889 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -49,13 +49,13 @@
 	.name	= "Kernel data",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_MEM,
+	.flags	= IORESOURCE_SYSTEM_RAM,
 };
 static struct resource __initdata kernel_code = {
 	.name	= "Kernel code",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_MEM,
+	.flags	= IORESOURCE_SYSTEM_RAM,
 	.sibling = &kernel_data,
 };
 
@@ -134,7 +134,7 @@
 	new->start = start;
 	new->end = end;
 	new->name = "System RAM";
-	new->flags = IORESOURCE_MEM;
+	new->flags = IORESOURCE_SYSTEM_RAM;
 
 	*pprev = new;
 }
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 72e17f7..786e36e 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -281,8 +281,6 @@
 	 */
 	set_ist(_vectors_start);
 
-	lockdep_init();
-
 	/*
 	 * dtb is passed in from bootloader.
 	 * fdt is linked in blob.
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index caae3f4..300dac3 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1178,7 +1178,7 @@
 	efi_memory_desc_t *md;
 	u64 efi_desc_size;
 	char *name;
-	unsigned long flags;
+	unsigned long flags, desc;
 
 	efi_map_start = __va(ia64_boot_param->efi_memmap);
 	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
@@ -1193,6 +1193,8 @@
 			continue;
 
 		flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		desc = IORES_DESC_NONE;
+
 		switch (md->type) {
 
 			case EFI_MEMORY_MAPPED_IO:
@@ -1207,14 +1209,17 @@
 				if (md->attribute & EFI_MEMORY_WP) {
 					name = "System ROM";
 					flags |= IORESOURCE_READONLY;
-				} else if (md->attribute == EFI_MEMORY_UC)
+				} else if (md->attribute == EFI_MEMORY_UC) {
 					name = "Uncached RAM";
-				else
+				} else {
 					name = "System RAM";
+					flags |= IORESOURCE_SYSRAM;
+				}
 				break;
 
 			case EFI_ACPI_MEMORY_NVS:
 				name = "ACPI Non-volatile Storage";
+				desc = IORES_DESC_ACPI_NV_STORAGE;
 				break;
 
 			case EFI_UNUSABLE_MEMORY:
@@ -1224,6 +1229,7 @@
 
 			case EFI_PERSISTENT_MEMORY:
 				name = "Persistent Memory";
+				desc = IORES_DESC_PERSISTENT_MEMORY;
 				break;
 
 			case EFI_RESERVED_TYPE:
@@ -1246,6 +1252,7 @@
 		res->start = md->phys_addr;
 		res->end = md->phys_addr + efi_md_size(md) - 1;
 		res->flags = flags;
+		res->desc = desc;
 
 		if (insert_resource(&iomem_resource, res) < 0)
 			kfree(res);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4f118b0..2029a38 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -80,17 +80,17 @@
 
 static struct resource data_resource = {
 	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 unsigned long ia64_max_cacheline_size;
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 836ac5a..2841c0a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -276,6 +276,7 @@
 
 config SMP
 	bool "Symmetric multi-processing support"
+	depends on MMU
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, say N. If you have a system with more
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index a5ecef7..136c69f 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -70,14 +70,14 @@
 	.name   = "Kernel data",
 	.start  = 0,
 	.end    = 0,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name   = "Kernel code",
 	.start  = 0,
 	.end    = 0,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 unsigned long memory_start;
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index fc96e81..d1fc479 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -108,6 +108,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -266,6 +268,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -366,6 +374,7 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_HYDRA=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 05c904f..9bfe8be 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -106,6 +106,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -264,6 +266,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -344,6 +352,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index d572b73..ebdcfae 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -106,6 +106,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -264,6 +266,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -353,6 +361,7 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 11a30c6..8acc65e 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -104,6 +104,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -262,6 +264,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -343,6 +351,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 6630a51..0c6a3d5 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -106,6 +106,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -264,6 +266,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -345,6 +353,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 1d90b71..12a8a6c 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -105,6 +105,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -266,6 +268,12 @@
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -362,6 +370,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 1fd21c1..64ff2dc 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -115,6 +115,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -276,6 +278,12 @@
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -404,6 +412,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_HYDRA=y
 CONFIG_MAC8390=y
 CONFIG_NE2000=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 74e10f7..07fc6ab 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -103,6 +103,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -261,6 +263,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -343,6 +351,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 7034e71..69903de 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -104,6 +104,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -262,6 +264,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -343,6 +351,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index f7deb5f..bd84016 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -104,6 +104,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -262,6 +264,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -352,6 +360,7 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 0ce79eb..5f9fb3a 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -101,6 +101,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -259,6 +261,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -340,6 +348,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 4cb787e..5d1c674 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -101,6 +101,8 @@
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
@@ -259,6 +261,12 @@
 CONFIG_BRIDGE=m
 CONFIG_ATALK=m
 CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_BATMAN_ADV=m
 CONFIG_BATMAN_ADV_DAT=y
@@ -341,6 +349,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index f9d96bf..bafaff6 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		376
+#define NR_syscalls		377
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 36cf129..0ca7296 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -381,5 +381,6 @@
 #define __NR_userfaultfd	373
 #define __NR_membarrier		374
 #define __NR_mlock2		375
+#define __NR_copy_file_range	376
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 282cd90..8bb9426 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -396,3 +396,4 @@
 	.long sys_userfaultfd
 	.long sys_membarrier
 	.long sys_mlock2		/* 375 */
+	.long sys_copy_file_range
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 89a2a939..f31ebb5 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -130,8 +130,6 @@
 	memset(__bss_start, 0, __bss_stop-__bss_start);
 	memset(_ssbss, 0, _esbss-_ssbss);
 
-	lockdep_init();
-
 /* initialize device tree for usage in early_printk */
 	early_init_devtree(_fdt_start);
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 57a945e..d3da79d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2085,7 +2085,7 @@
 
 config PAGE_SIZE_64KB
 	bool "64kB"
-	depends on !CPU_R3000 && !CPU_TX39XX
+	depends on !CPU_R3000 && !CPU_TX39XX && !CPU_R6000
 	help
 	  Using 64kB page size will result in higher performance kernel at
 	  the price of higher memory consumption.  This option is available on
@@ -2169,7 +2169,7 @@
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select SYNC_R4K
-	select MIPS_GIC_IPI
+	select MIPS_GIC_IPI if MIPS_GIC
 	select MIPS_MT
 	select SMP
 	select SMP_UP
@@ -2267,7 +2267,7 @@
 config MIPS_CMP
 	bool "MIPS CMP framework support (DEPRECATED)"
 	depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
-	select MIPS_GIC_IPI
+	select MIPS_GIC_IPI if MIPS_GIC
 	select SMP
 	select SYNC_R4K
 	select SYS_SUPPORTS_SMP
@@ -2287,7 +2287,7 @@
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
-	select MIPS_GIC_IPI
+	select MIPS_GIC_IPI if MIPS_GIC
 	select SMP
 	select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
 	select SYS_SUPPORTS_HOTPLUG_CPU
@@ -2306,6 +2306,7 @@
 	bool
 
 config MIPS_GIC_IPI
+	depends on MIPS_GIC
 	bool
 
 config MIPS_CM
diff --git a/arch/mips/bcm63xx/nvram.c b/arch/mips/bcm63xx/nvram.c
index 5f2bc1e..05757ae 100644
--- a/arch/mips/bcm63xx/nvram.c
+++ b/arch/mips/bcm63xx/nvram.c
@@ -19,6 +19,8 @@
 
 #include <bcm63xx_nvram.h>
 
+#define BCM63XX_DEFAULT_PSI_SIZE	64
+
 static struct bcm963xx_nvram nvram;
 static int mac_addr_used;
 
@@ -85,3 +87,12 @@
 	return 0;
 }
 EXPORT_SYMBOL(bcm63xx_nvram_get_mac_address);
+
+int bcm63xx_nvram_get_psi_size(void)
+{
+	if (nvram.psi_size > 0)
+		return nvram.psi_size;
+
+	return BCM63XX_DEFAULT_PSI_SIZE;
+}
+EXPORT_SYMBOL(bcm63xx_nvram_get_psi_size);
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index 408799a..f752114 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -17,7 +17,7 @@
 #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
 #endif
 
-#ifdef CONFIG_MACH_JZ4740
+#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
 #include <asm/mach-jz4740/base.h>
 #define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
 #endif
diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi
index 459b9b2..d61b161 100644
--- a/arch/mips/boot/dts/brcm/bcm6328.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi
@@ -74,6 +74,7 @@
 		timer: timer@10000040 {
 			compatible = "syscon";
 			reg = <0x10000040 0x2c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
index 4fc7ece..1a7efa8 100644
--- a/arch/mips/boot/dts/brcm/bcm7125.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -98,6 +98,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7125-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x60c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
index a3039bb..d4bf52c 100644
--- a/arch/mips/boot/dts/brcm/bcm7346.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -118,6 +118,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7346-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x51c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
index 4274ff4..8e25016 100644
--- a/arch/mips/boot/dts/brcm/bcm7358.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -112,6 +112,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7358-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x51c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
index 0dcc9163..7e5f760 100644
--- a/arch/mips/boot/dts/brcm/bcm7360.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -112,6 +112,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7360-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x51c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
index 2f3f9fc..c739ea7 100644
--- a/arch/mips/boot/dts/brcm/bcm7362.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -118,6 +118,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7362-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x51c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
index bee221b3..5f55d0a 100644
--- a/arch/mips/boot/dts/brcm/bcm7420.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -99,6 +99,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7420-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x60c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
index 571f30f..e24d41a 100644
--- a/arch/mips/boot/dts/brcm/bcm7425.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -100,6 +100,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x51c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index 614ee21..8b9432c 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi
@@ -114,6 +114,7 @@
 		sun_top_ctrl: syscon@404000 {
 			compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
 			reg = <0x404000 0x51c>;
+			little-endian;
 		};
 
 		reboot {
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index cefb7a5..e090fc3 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -227,7 +227,7 @@
 	int __res = 1;							\
 	struct elfhdr *__h = (hdr);					\
 									\
-	if (__h->e_machine != EM_MIPS)					\
+	if (!mips_elf_check_machine(__h))				\
 		__res = 0;						\
 	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
 		__res = 0;						\
@@ -258,7 +258,7 @@
 	int __res = 1;							\
 	struct elfhdr *__h = (hdr);					\
 									\
-	if (__h->e_machine != EM_MIPS)					\
+	if (!mips_elf_check_machine(__h))				\
 		__res = 0;						\
 	if (__h->e_ident[EI_CLASS] != ELFCLASS64)			\
 		__res = 0;						\
@@ -285,6 +285,11 @@
 
 #endif /* !defined(ELF_ARCH) */
 
+#define mips_elf_check_machine(x) ((x)->e_machine == EM_MIPS)
+
+#define vmcore_elf32_check_arch mips_elf_check_machine
+#define vmcore_elf64_check_arch mips_elf_check_machine
+
 struct mips_abi;
 
 extern struct mips_abi mips_abi;
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 9cbf383..f06f97b 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -179,6 +179,10 @@
 		if (save)
 			_save_fp(tsk);
 		__disable_fpu();
+	} else {
+		/* FPU should not have been left enabled with no owner */
+		WARN(read_c0_status() & ST0_CU1,
+		     "Orphaned FPU left enabled");
 	}
 	KSTK_STATUS(tsk) &= ~ST0_CU1;
 	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
index 4e0b6bc..348df49 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
@@ -30,4 +30,6 @@
  */
 int bcm63xx_nvram_get_mac_address(u8 *mac);
 
+int bcm63xx_nvram_get_psi_size(void);
+
 #endif /* BCM63XX_NVRAM_H */
diff --git a/arch/mips/include/asm/octeon/octeon-feature.h b/arch/mips/include/asm/octeon/octeon-feature.h
index 8ebd3f57..3ed10a8 100644
--- a/arch/mips/include/asm/octeon/octeon-feature.h
+++ b/arch/mips/include/asm/octeon/octeon-feature.h
@@ -128,7 +128,8 @@
 	case OCTEON_FEATURE_PCIE:
 		return OCTEON_IS_MODEL(OCTEON_CN56XX)
 			|| OCTEON_IS_MODEL(OCTEON_CN52XX)
-			|| OCTEON_IS_MODEL(OCTEON_CN6XXX);
+			|| OCTEON_IS_MODEL(OCTEON_CN6XXX)
+			|| OCTEON_IS_MODEL(OCTEON_CN7XXX);
 
 	case OCTEON_FEATURE_SRIO:
 		return OCTEON_IS_MODEL(OCTEON_CN63XX)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 3f832c3..041153f 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -45,7 +45,7 @@
  * User space process size: 2GB. This is hardcoded into a few places,
  * so don't change it unless you know what you are doing.
  */
-#define TASK_SIZE	0x7fff8000UL
+#define TASK_SIZE	0x80000000UL
 #endif
 
 #define STACK_TOP_MAX	TASK_SIZE
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index a71da57..eebf395 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -289,7 +289,7 @@
 		.set	reorder
 		.set	noat
 		mfc0	a0, CP0_STATUS
-		li	v1, 0xff00
+		li	v1, ST0_CU1 | ST0_IM
 		ori	a0, STATMASK
 		xori	a0, STATMASK
 		mtc0	a0, CP0_STATUS
@@ -330,7 +330,7 @@
 		ori	a0, STATMASK
 		xori	a0, STATMASK
 		mtc0	a0, CP0_STATUS
-		li	v1, 0xff00
+		li	v1, ST0_CU1 | ST0_FR | ST0_IM
 		and	a0, v1
 		LONG_L	v0, PT_STATUS(sp)
 		nor	v1, $0, v1
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 6499d93..47bc45a 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -101,10 +101,8 @@
 	/* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */
 	if ((config_enabled(CONFIG_32BIT) ||
 	    test_tsk_thread_flag(task, TIF_32BIT_REGS)) &&
-	    (regs->regs[2] == __NR_syscall)) {
+	    (regs->regs[2] == __NR_syscall))
 		i++;
-		n++;
-	}
 
 	while (n--)
 		ret |= mips_get_syscall_arg(args++, task, regs, i++);
diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 90f03a7..3129795 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -380,16 +380,17 @@
 #define __NR_userfaultfd		(__NR_Linux + 357)
 #define __NR_membarrier			(__NR_Linux + 358)
 #define __NR_mlock2			(__NR_Linux + 359)
+#define __NR_copy_file_range		(__NR_Linux + 360)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		359
+#define __NR_Linux_syscalls		360
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		359
+#define __NR_O32_Linux_syscalls		360
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -717,16 +718,17 @@
 #define __NR_userfaultfd		(__NR_Linux + 317)
 #define __NR_membarrier			(__NR_Linux + 318)
 #define __NR_mlock2			(__NR_Linux + 319)
+#define __NR_copy_file_range		(__NR_Linux + 320)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		319
+#define __NR_Linux_syscalls		320
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		319
+#define __NR_64_Linux_syscalls		320
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1058,15 +1060,16 @@
 #define __NR_userfaultfd		(__NR_Linux + 321)
 #define __NR_membarrier			(__NR_Linux + 322)
 #define __NR_mlock2			(__NR_Linux + 323)
+#define __NR_copy_file_range		(__NR_Linux + 324)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		323
+#define __NR_Linux_syscalls		324
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		323
+#define __NR_N32_Linux_syscalls		324
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 8c6d76c..d9907e5 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -270,7 +270,7 @@
 }
 EXPORT_SYMBOL(jz_gpio_port_get_value);
 
-#define IRQ_TO_BIT(irq) BIT(irq_to_gpio(irq) & 0x1f)
+#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f)
 
 static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq)
 {
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 1188e00..1b992c6 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -35,7 +35,7 @@
 	int __res = 1;							\
 	struct elfhdr *__h = (hdr);					\
 									\
-	if (__h->e_machine != EM_MIPS)					\
+	if (!mips_elf_check_machine(__h))				\
 		__res = 0;						\
 	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
 		__res = 0;						\
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index 9287678..abd3aff 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -47,7 +47,7 @@
 	int __res = 1;							\
 	struct elfhdr *__h = (hdr);					\
 									\
-	if (__h->e_machine != EM_MIPS)					\
+	if (!mips_elf_check_machine(__h))				\
 		__res = 0;						\
 	if (__h->e_ident[EI_CLASS] != ELFCLASS32)			\
 		__res = 0;						\
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index f2975d4..eddd5fd 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -65,12 +65,10 @@
 	status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
 	status |= KU_USER;
 	regs->cp0_status = status;
-	clear_used_math();
-	clear_fpu_owner();
-	init_dsp();
-	clear_thread_flag(TIF_USEDMSA);
+	lose_fpu(0);
 	clear_thread_flag(TIF_MSA_CTX_LIVE);
-	disable_msa();
+	clear_used_math();
+	init_dsp();
 	regs->cp0_epc = pc;
 	regs->regs[29] = sp;
 }
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index 5ce3b74..b4ac637 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -125,7 +125,7 @@
 	END(_restore_fp_context)
 	.set	reorder
 
-	.type	fault@function
+	.type	fault, @function
 	.ent	fault
 fault:	li	v0, -EFAULT
 	jr	ra
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index f09546e..17732f8 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -358,7 +358,7 @@
 
 	.set	reorder
 
-	.type	fault@function
+	.type	fault, @function
 	.ent	fault
 fault:	li	v0, -EFAULT				# failure
 	jr	ra
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 2d23c83..a563174 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -595,3 +595,4 @@
 	PTR	sys_userfaultfd
 	PTR	sys_membarrier
 	PTR	sys_mlock2
+	PTR	sys_copy_file_range		/* 4360 */
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index deac633..2b2dc14 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -433,4 +433,5 @@
 	PTR	sys_userfaultfd
 	PTR	sys_membarrier
 	PTR	sys_mlock2
+	PTR	sys_copy_file_range		/* 5320 */
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 5a69eb4..2bf5c85 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -423,4 +423,5 @@
 	PTR	sys_userfaultfd
 	PTR	sys_membarrier
 	PTR	sys_mlock2
+	PTR	sys_copy_file_range
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index e4b6d7c..c5b759e 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -578,4 +578,5 @@
 	PTR	sys_userfaultfd
 	PTR	sys_membarrier
 	PTR	sys_mlock2
+	PTR	sys_copy_file_range		/* 4360 */
 	.size	sys32_call_table,.-sys32_call_table
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 569a7d5..4f60734 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -732,21 +732,23 @@
 			end = HIGHMEM_START - 1;
 
 		res = alloc_bootmem(sizeof(struct resource));
+
+		res->start = start;
+		res->end = end;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
 		switch (boot_mem_map.map[i].type) {
 		case BOOT_MEM_RAM:
 		case BOOT_MEM_INIT_RAM:
 		case BOOT_MEM_ROM_DATA:
 			res->name = "System RAM";
+			res->flags |= IORESOURCE_SYSRAM;
 			break;
 		case BOOT_MEM_RESERVED:
 		default:
 			res->name = "reserved";
 		}
 
-		res->start = start;
-		res->end = end;
-
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		request_resource(&iomem_resource, res);
 
 		/*
@@ -782,6 +784,7 @@
 void __init setup_arch(char **cmdline_p)
 {
 	cpu_probe();
+	mips_cm_probe();
 	prom_init();
 
 	setup_early_fdc_console();
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index bd4385a..2b521e0 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -121,6 +121,7 @@
 	cpumask_t temp_foreign_map;
 
 	/* Re-calculate the mask */
+	cpumask_clear(&temp_foreign_map);
 	for_each_online_cpu(i) {
 		core_present = 0;
 		for_each_cpu(k, &temp_foreign_map)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index bafcb7a..bf14da9 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -663,7 +663,7 @@
 	return -1;
 }
 
-static int simulate_rdhwr_mm(struct pt_regs *regs, unsigned short opcode)
+static int simulate_rdhwr_mm(struct pt_regs *regs, unsigned int opcode)
 {
 	if ((opcode & MM_POOL32A_FUNC) == MM_RDHWR) {
 		int rd = (opcode & MM_RS) >> 16;
@@ -690,15 +690,15 @@
 asmlinkage void do_ov(struct pt_regs *regs)
 {
 	enum ctx_state prev_state;
-	siginfo_t info;
+	siginfo_t info = {
+		.si_signo = SIGFPE,
+		.si_code = FPE_INTOVF,
+		.si_addr = (void __user *)regs->cp0_epc,
+	};
 
 	prev_state = exception_enter();
 	die_if_kernel("Integer overflow", regs);
 
-	info.si_code = FPE_INTOVF;
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_addr = (void __user *) regs->cp0_epc;
 	force_sig_info(SIGFPE, &info, current);
 	exception_exit(prev_state);
 }
@@ -874,7 +874,7 @@
 void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
 	const char *str)
 {
-	siginfo_t info;
+	siginfo_t info = { 0 };
 	char b[40];
 
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
@@ -903,7 +903,6 @@
 		else
 			info.si_code = FPE_INTOVF;
 		info.si_signo = SIGFPE;
-		info.si_errno = 0;
 		info.si_addr = (void __user *) regs->cp0_epc;
 		force_sig_info(SIGFPE, &info, current);
 		break;
@@ -1119,11 +1118,12 @@
 	if (get_isa16_mode(regs->cp0_epc)) {
 		unsigned short mmop[2] = { 0 };
 
-		if (unlikely(get_user(mmop[0], epc) < 0))
+		if (unlikely(get_user(mmop[0], (u16 __user *)epc + 0) < 0))
 			status = SIGSEGV;
-		if (unlikely(get_user(mmop[1], epc) < 0))
+		if (unlikely(get_user(mmop[1], (u16 __user *)epc + 1) < 0))
 			status = SIGSEGV;
-		opcode = (mmop[0] << 16) | mmop[1];
+		opcode = mmop[0];
+		opcode = (opcode << 16) | mmop[1];
 
 		if (status < 0)
 			status = simulate_rdhwr_mm(regs, opcode);
@@ -1369,26 +1369,12 @@
 		if (unlikely(compute_return_epc(regs) < 0))
 			break;
 
-		if (get_isa16_mode(regs->cp0_epc)) {
-			unsigned short mmop[2] = { 0 };
-
-			if (unlikely(get_user(mmop[0], epc) < 0))
-				status = SIGSEGV;
-			if (unlikely(get_user(mmop[1], epc) < 0))
-				status = SIGSEGV;
-			opcode = (mmop[0] << 16) | mmop[1];
-
-			if (status < 0)
-				status = simulate_rdhwr_mm(regs, opcode);
-		} else {
+		if (!get_isa16_mode(regs->cp0_epc)) {
 			if (unlikely(get_user(opcode, epc) < 0))
 				status = SIGSEGV;
 
 			if (!cpu_has_llsc && status < 0)
 				status = simulate_llsc(regs, opcode);
-
-			if (status < 0)
-				status = simulate_rdhwr_normal(regs, opcode);
 		}
 
 		if (status < 0)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8bc3977..70ef1a4 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -445,8 +445,8 @@
 
 	dvcpu->arch.wait = 0;
 
-	if (waitqueue_active(&dvcpu->wq))
-		wake_up_interruptible(&dvcpu->wq);
+	if (swait_active(&dvcpu->wq))
+		swake_up(&dvcpu->wq);
 
 	return 0;
 }
@@ -702,7 +702,7 @@
 	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
 		void __user *uaddr = (void __user *)(long)reg->addr;
 
-		return copy_to_user(uaddr, vs, 16);
+		return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0;
 	} else {
 		return -EINVAL;
 	}
@@ -732,7 +732,7 @@
 	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
 		void __user *uaddr = (void __user *)(long)reg->addr;
 
-		return copy_from_user(vs, uaddr, 16);
+		return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0;
 	} else {
 		return -EINVAL;
 	}
@@ -1174,8 +1174,8 @@
 	kvm_mips_callbacks->queue_timer_int(vcpu);
 
 	vcpu->arch.wait = 0;
-	if (waitqueue_active(&vcpu->wq))
-		wake_up_interruptible(&vcpu->wq);
+	if (swait_active(&vcpu->wq))
+		swake_up(&vcpu->wq);
 }
 
 /* low level hrtimer wake routine */
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 5c81fdd..3530376 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -146,7 +146,7 @@
 {
 	unsigned long rnd;
 
-	rnd = (unsigned long)get_random_int();
+	rnd = get_random_long();
 	rnd <<= PAGE_SHIFT;
 	if (TASK_IS_32BIT_ADDR)
 		rnd &= 0xfffffful;
@@ -174,7 +174,7 @@
 
 static inline unsigned long brk_rnd(void)
 {
-	unsigned long rnd = get_random_int();
+	unsigned long rnd = get_random_long();
 
 	rnd = rnd << PAGE_SHIFT;
 	/* 8MB for 32bit, 256MB for 64bit */
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 3bd0597..91dec32 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -164,11 +164,13 @@
 
 	sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK;
 	sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF;
-	c->scache.sets = 64 << sets;
+	if (sets)
+		c->scache.sets = 64 << sets;
 
 	line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK;
 	line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF;
-	c->scache.linesz = 2 << line_sz;
+	if (line_sz)
+		c->scache.linesz = 2 << line_sz;
 
 	assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK;
 	assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF;
@@ -176,13 +178,12 @@
 	c->scache.waysize = c->scache.sets * c->scache.linesz;
 	c->scache.waybit = __ffs(c->scache.waysize);
 
-	c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+	if (c->scache.linesz) {
+		c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+		return 1;
+	}
 
-	return 1;
-}
-
-void __weak platform_early_l2_init(void)
-{
+	return 0;
 }
 
 static inline int __init mips_sc_probe(void)
@@ -194,12 +195,6 @@
 	/* Mark as not present until probe completed */
 	c->scache.flags |= MIPS_CACHE_NOT_PRESENT;
 
-	/*
-	 * Do we need some platform specific probing before
-	 * we configure L2?
-	 */
-	platform_early_l2_init();
-
 	if (mips_cm_revision() >= CM_REV_CM3)
 		return mips_sc_probe_cm3();
 
diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index 571148c..dc2c521 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -293,7 +293,6 @@
 	console_config();
 #endif
 	/* Early detection of CMP support */
-	mips_cm_probe();
 	mips_cpc_probe();
 
 	if (!register_cps_smp_ops())
@@ -304,10 +303,3 @@
 		return;
 	register_up_smp_ops();
 }
-
-void platform_early_l2_init(void)
-{
-	/* L2 configuration lives in the CM3 */
-	if (mips_cm_revision() >= CM_REV_CM3)
-		mips_cm_probe();
-}
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index a009ee4..1ae932c 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -297,12 +297,12 @@
 		return PTR_ERR(rstpcie0);
 
 	bridge_base = devm_ioremap_resource(&pdev->dev, bridge_res);
-	if (!bridge_base)
-		return -ENOMEM;
+	if (IS_ERR(bridge_base))
+		return PTR_ERR(bridge_base);
 
 	pcie_base = devm_ioremap_resource(&pdev->dev, pcie_res);
-	if (!pcie_base)
-		return -ENOMEM;
+	if (IS_ERR(pcie_base))
+		return PTR_ERR(pcie_base);
 
 	iomem_resource.start = 0;
 	iomem_resource.end = ~0;
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 3d0e17b..df0f52b 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -22,6 +22,9 @@
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init	__read_mostly
+
 void parisc_cache_init(void);	/* initializes cache-flushing */
 void disable_sr_hashing_asm(int); /* low level support for above */
 void disable_sr_hashing(void);   /* turns off space register hashing */
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 845272c..7bd69bd 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -121,10 +121,6 @@
 	}
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
 #include <asm/kmap_types.h>
 
 #define ARCH_HAS_KMAP
diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h
index f84ff12..6d8276cd 100644
--- a/arch/parisc/include/asm/floppy.h
+++ b/arch/parisc/include/asm/floppy.h
@@ -33,7 +33,7 @@
  * floppy accesses go through the track buffer.
  */
 #define _CROSS_64KB(a,s,vdma) \
-(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
 
 #define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
 
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 35bdccb..b75039f 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -361,8 +361,9 @@
 #define __NR_membarrier		(__NR_Linux + 343)
 #define __NR_userfaultfd	(__NR_Linux + 344)
 #define __NR_mlock2		(__NR_Linux + 345)
+#define __NR_copy_file_range	(__NR_Linux + 346)
 
-#define __NR_Linux_syscalls	(__NR_mlock2 + 1)
+#define __NR_Linux_syscalls	(__NR_copy_file_range + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 9585c81..ce0b2b4 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -269,14 +269,19 @@
 
 long do_syscall_trace_enter(struct pt_regs *regs)
 {
-	long ret = 0;
-
 	/* Do the secure computing check first. */
 	secure_computing_strict(regs->gr[20]);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
-	    tracehook_report_syscall_entry(regs))
-		ret = -1L;
+	    tracehook_report_syscall_entry(regs)) {
+		/*
+		 * Tracing decided this syscall should not happen or the
+		 * debugger stored an invalid system call number. Skip
+		 * the system call and the system call restart handling.
+		 */
+		regs->gr[20] = -1UL;
+		goto out;
+	}
 
 #ifdef CONFIG_64BIT
 	if (!is_compat_task())
@@ -290,7 +295,8 @@
 			regs->gr[24] & 0xffffffff,
 			regs->gr[23] & 0xffffffff);
 
-	return ret ? : regs->gr[20];
+out:
+	return regs->gr[20];
 }
 
 void do_syscall_trace_exit(struct pt_regs *regs)
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 3fbd725..fbafa0d 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -343,7 +343,7 @@
 #endif
 
 	comiclr,>>=	__NR_Linux_syscalls, %r20, %r0
-	b,n	.Lsyscall_nosys
+	b,n	.Ltracesys_nosys
 
 	LDREGX  %r20(%r19), %r19
 
@@ -359,6 +359,9 @@
 	be      0(%sr7,%r19)
 	ldo	R%tracesys_exit(%r2),%r2
 
+.Ltracesys_nosys:
+	ldo	-ENOSYS(%r0),%r28		/* set errno */
+
 	/* Do *not* call this function on the gateway page, because it
 	makes a direct call to syscall_trace. */
 	
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index d4ffcfb..585d50f 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -441,6 +441,7 @@
 	ENTRY_SAME(membarrier)
 	ENTRY_SAME(userfaultfd)
 	ENTRY_SAME(mlock2)		/* 345 */
+	ENTRY_SAME(copy_file_range)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 1b366c4..3c07d6b 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -55,12 +55,12 @@
 
 static struct resource data_resource = {
 	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource pdcdata_resource = {
@@ -201,7 +201,7 @@
 		res->name = "System RAM";
 		res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT;
 		res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 		request_resource(&iomem_resource, res);
 	}
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e4824fd..9faa18c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -557,7 +557,7 @@
 
 config PPC_4K_PAGES
 	bool "4k page size"
-	select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+	select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
 
 config PPC_16K_PAGES
 	bool "16k page size"
@@ -566,7 +566,7 @@
 config PPC_64K_PAGES
 	bool "64k page size"
 	depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
-	select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+	select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
 
 config PPC_256K_PAGES
 	bool "256k page size"
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 06f17e7..8d1c816 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_THP_HUGE)
+			 _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+			 _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8204b0c..ac07a30 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -223,7 +223,6 @@
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
@@ -282,6 +281,10 @@
 extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 			    pmd_t *pmdp);
 
+#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+				    unsigned long address, pmd_t *pmdp);
+
 #define pmd_move_must_withdraw pmd_move_must_withdraw
 struct spinlock;
 static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c5eb86f..867c39b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -81,6 +81,7 @@
 #define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
 #define EEH_PE_CFG_RESTRICTED	(1 << 9)	/* Block config on error */
 #define EEH_PE_REMOVED		(1 << 10)	/* Removed permanently	*/
+#define EEH_PE_PRI_BUS		(1 << 11)	/* Cached primary bus   */
 
 struct eeh_pe {
 	int type;			/* PE type: PHB/Bus/Device	*/
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 271fefb..c98afa5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS	512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -290,7 +289,7 @@
 	struct list_head runnable_threads;
 	struct list_head preempt_list;
 	spinlock_t lock;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
@@ -630,7 +629,7 @@
 	u8 prodded;
 	u32 last_inst;
 
-	wait_queue_head_t *wqp;
+	struct swait_queue_head *wqp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 5654ece..3fa9df7 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -383,3 +383,4 @@
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 8e86b48..32e36b1 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -57,12 +57,14 @@
 extern void hcall_tracepoint_regfunc(void);
 extern void hcall_tracepoint_unregfunc(void);
 
-TRACE_EVENT_FN(hcall_entry,
+TRACE_EVENT_FN_COND(hcall_entry,
 
 	TP_PROTO(unsigned long opcode, unsigned long *args),
 
 	TP_ARGS(opcode, args),
 
+	TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
 	TP_STRUCT__entry(
 		__field(unsigned long, opcode)
 	),
@@ -76,13 +78,15 @@
 	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
 );
 
-TRACE_EVENT_FN(hcall_exit,
+TRACE_EVENT_FN_COND(hcall_exit,
 
 	TP_PROTO(unsigned long opcode, unsigned long retval,
 		unsigned long *retbuf),
 
 	TP_ARGS(opcode, retval, retbuf),
 
+	TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
 	TP_STRUCT__entry(
 		__field(unsigned long, opcode)
 		__field(unsigned long, retval)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6a5ace5..1f2594d 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		379
+#define NR_syscalls		380
 
 #define __NR__exit __NR_exit
 
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 12a0565..940290d 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -389,5 +389,6 @@
 #define __NR_userfaultfd	364
 #define __NR_membarrier		365
 #define __NR_mlock2		378
+#define __NR_copy_file_range	379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 9387421..650cfb3 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -418,8 +418,7 @@
 		eeh_pcid_put(dev);
 		if (driver->err_handler &&
 		    driver->err_handler->error_detected &&
-		    driver->err_handler->slot_reset &&
-		    driver->err_handler->resume)
+		    driver->err_handler->slot_reset)
 			return NULL;
 	}
 
@@ -564,6 +563,7 @@
 	 */
 	eeh_pe_state_mark(pe, EEH_PE_KEEP);
 	if (bus) {
+		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 		pci_lock_rescan_remove();
 		pcibios_remove_pci_devices(bus);
 		pci_unlock_rescan_remove();
@@ -803,6 +803,7 @@
 	 * the their PCI config any more.
 	 */
 	if (frozen_bus) {
+		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
 		pci_lock_rescan_remove();
@@ -886,6 +887,7 @@
 					continue;
 
 				/* Notify all devices to be down */
+				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 				bus = eeh_pe_bus_get(phb_pe);
 				eeh_pe_dev_traverse(pe,
 					eeh_report_failure, NULL);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 8654cb1..98f8180 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -883,32 +883,29 @@
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
 	struct pci_bus *bus = eeh_pe_bus_get(pe);
-	struct device_node *dn = pci_bus_to_OF_node(bus);
+	struct device_node *dn;
 	const char *loc = NULL;
 
-	if (!dn)
-		goto out;
+	while (bus) {
+		dn = pci_bus_to_OF_node(bus);
+		if (!dn) {
+			bus = bus->parent;
+			continue;
+		}
 
-	/* PHB PE or root PE ? */
-	if (pci_is_root_bus(bus)) {
-		loc = of_get_property(dn, "ibm,loc-code", NULL);
-		if (!loc)
+		if (pci_is_root_bus(bus))
 			loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
-		if (loc)
-			goto out;
+		else
+			loc = of_get_property(dn, "ibm,slot-location-code",
+					      NULL);
 
-		/* Check the root port */
-		dn = dn->child;
-		if (!dn)
-			goto out;
+		if (loc)
+			return loc;
+
+		bus = bus->parent;
 	}
 
-	loc = of_get_property(dn, "ibm,loc-code", NULL);
-	if (!loc)
-		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-	return loc ? loc : "N/A";
+	return "N/A";
 }
 
 /**
@@ -931,7 +928,7 @@
 		bus = pe->phb->bus;
 	} else if (pe->type & EEH_PE_BUS ||
 		   pe->type & EEH_PE_DEVICE) {
-		if (pe->bus) {
+		if (pe->state & EEH_PE_PRI_BUS) {
 			bus = pe->bus;
 			goto out;
 		}
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804c..aec9a1b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -109,8 +109,9 @@
 	 * If the breakpoint is unregistered between a hw_breakpoint_handler()
 	 * and the single_step_dabr_instruction(), then cleanup the breakpoint
 	 * restoration variables to prevent dangling pointers.
+	 * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
 	 */
-	if (bp->ctx && bp->ctx->task)
+	if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
 		bp->ctx->task->thread.last_hit_ubp = NULL;
 }
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index db475d4..f28754c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -701,31 +701,3 @@
 	li	r5,0
 	blr	/* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-	.llong	__crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-	.asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-	.llong 0 /* .value */
-	.llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 59663af..08b7a40 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -326,7 +326,10 @@
 		}
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
 	unsigned int i;
@@ -334,8 +337,11 @@
 	for (i = 1; i < numsyms; i++) {
 		if (syms[i].st_shndx == SHN_UNDEF) {
 			char *name = strtab + syms[i].st_name;
-			if (name[0] == '.')
-				memmove(name, name+1, strlen(name));
+			if (name[0] == '.') {
+				if (strcmp(name+1, "TOC.") == 0)
+					syms[i].st_shndx = SHN_ABS;
+				syms[i].st_name++;
+			}
 		}
 	}
 }
@@ -351,7 +357,7 @@
 	numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
 	for (i = 1; i < numsyms; i++) {
-		if (syms[i].st_shndx == SHN_UNDEF
+		if (syms[i].st_shndx == SHN_ABS
 		    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
 			return &syms[i];
 	}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e..3c5736e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1768,9 +1768,9 @@
 
 	/* 8MB for 32bit, 1GB for 64bit */
 	if (is_32bit_task())
-		rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+		rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
 	else
-		rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+		rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ad8c9db..d544fa3 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -114,8 +114,6 @@
 
 notrace void __init machine_init(u64 dt_ptr)
 {
-	lockdep_init();
-
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a..f98be83 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -255,9 +255,6 @@
 	setup_paca(&boot_paca);
 	fixup_boot_paca();
 
-	/* Initialize lockdep early or else spinlocks will blow */
-	lockdep_init();
-
 	/* -------- printk is now safe to use ------- */
 
 	/* Enable early debugging if any specified (see udbg.h) */
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253..9bf7031 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -377,15 +377,12 @@
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-	struct kvmppc_vcpu_book3s *vcpu_book3s;
 	u64 esid, esid_1t;
 	int slb_nr;
 	struct kvmppc_slb *slbe;
 
 	dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-	vcpu_book3s = to_book3s(vcpu);
-
 	esid = GET_ESID(rb);
 	esid_1t = GET_ESID_1T(rb);
 	slb_nr = rb & 0xfff;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cff207b..f1187bb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -114,11 +114,11 @@
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int cpu;
-	wait_queue_head_t *wqp;
+	struct swait_queue_head *wqp;
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (waitqueue_active(wqp)) {
-		wake_up_interruptible(wqp);
+	if (swait_active(wqp)) {
+		swake_up(wqp);
 		++vcpu->stat.halt_wakeup;
 	}
 
@@ -701,8 +701,8 @@
 		tvcpu->arch.prodded = 1;
 		smp_mb();
 		if (vcpu->arch.ceded) {
-			if (waitqueue_active(&vcpu->wq)) {
-				wake_up_interruptible(&vcpu->wq);
+			if (swait_active(&vcpu->wq)) {
+				swake_up(&vcpu->wq);
 				vcpu->stat.halt_wakeup++;
 			}
 		}
@@ -833,6 +833,24 @@
 
 	vcpu->stat.sum_exits++;
 
+	/*
+	 * This can happen if an interrupt occurs in the last stages
+	 * of guest entry or the first stages of guest exit (i.e. after
+	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
+	 * That can happen due to a bug, or due to a machine check
+	 * occurring at just the wrong time.
+	 */
+	if (vcpu->arch.shregs.msr & MSR_HV) {
+		printk(KERN_EMERG "KVM trap in HV mode!\n");
+		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			vcpu->arch.shregs.msr);
+		kvmppc_dump_regs(vcpu);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		run->hw.hardware_exit_reason = vcpu->arch.trap;
+		return RESUME_HOST;
+	}
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 	switch (vcpu->arch.trap) {
@@ -1441,7 +1459,7 @@
 	INIT_LIST_HEAD(&vcore->runnable_threads);
 	spin_lock_init(&vcore->lock);
 	spin_lock_init(&vcore->stoltb_lock);
-	init_waitqueue_head(&vcore->wq);
+	init_swait_queue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = core * threads_per_subcore;
@@ -2513,10 +2531,9 @@
 {
 	struct kvm_vcpu *vcpu;
 	int do_sleep = 1;
+	DECLARE_SWAITQUEUE(wait);
 
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
 
 	/*
 	 * Check one last time for pending exceptions and ceded state after
@@ -2530,7 +2547,7 @@
 	}
 
 	if (!do_sleep) {
-		finish_wait(&vc->wq, &wait);
+		finish_swait(&vc->wq, &wait);
 		return;
 	}
 
@@ -2538,7 +2555,7 @@
 	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
-	finish_wait(&vc->wq, &wait);
+	finish_swait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
 	trace_kvmppc_vcore_blocked(vc, 1);
@@ -2594,7 +2611,7 @@
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			wake_up(&vc->wq);
+			swake_up(&vc->wq);
 		}
 
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3c6badc..25ae2c9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1370,6 +1370,20 @@
 	std	r6, VCPU_ACOP(r9)
 	stw	r7, VCPU_GUEST_PID(r9)
 	std	r8, VCPU_WORT(r9)
+	/*
+	 * Restore various registers to 0, where non-zero values
+	 * set by the guest could disrupt the host.
+	 */
+	li	r0, 0
+	mtspr	SPRN_IAMR, r0
+	mtspr	SPRN_CIABR, r0
+	mtspr	SPRN_DAWRX, r0
+	mtspr	SPRN_TCSCR, r0
+	mtspr	SPRN_WORT, r0
+	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+	li	r0, 1
+	sldi	r0, r0, 31
+	mtspr	SPRN_MMCRS, r0
 8:
 
 	/* Save and reset AMR and UAMOR before turning on the MMU */
@@ -2153,7 +2167,7 @@
 
 	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
-	rlwimi	r5, r4, 1, DAWRX_WT
+	rlwimi	r5, r4, 2, DAWRX_WT
 	clrrdi	r4, r4, 3
 	std	r4, VCPU_DAWR(r3)
 	std	r5, VCPU_DAWRX(r3)
@@ -2404,6 +2418,8 @@
 	 * guest as machine check causing guest to crash.
 	 */
 	ld	r11, VCPU_MSR(r9)
+	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+	bne	mc_cont			/* if so, exit to host */
 	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
 	beq	1f			/* Deliver a machine check to guest */
 	ld	r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fd2405..a3b182d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -919,21 +919,17 @@
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			val = get_reg_val(reg->id, vcpu->arch.vrsave);
 			break;
 #endif /* CONFIG_ALTIVEC */
 		default:
@@ -974,17 +970,21 @@
 				r = -ENXIO;
 				break;
 			}
-			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vrsave = set_reg_val(reg->id, val);
 			break;
 #endif /* CONFIG_ALTIVEC */
 		default:
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e..edb0991 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -111,7 +111,13 @@
 	 */
 	if (!(old_pte & _PAGE_COMBO)) {
 		flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
-		old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+		/*
+		 * clear the old slot details from the old and new pte.
+		 * On hash insert failure we use old pte value and we don't
+		 * want slot information there if we have a insert failure.
+		 */
+		old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+		new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
 		goto htab_insert_hpte;
 	}
 	/*
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b..eb2accd 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -78,9 +78,19 @@
 		 * base page size. This is because demote_segment won't flush
 		 * hash page table entries.
 		 */
-		if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+		if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
 			flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
 					    ssize, flags);
+			/*
+			 * With THP, we also clear the slot information with
+			 * respect to all the 64K hash pte mapping the 16MB
+			 * page. They are all invalid now. This make sure we
+			 * don't find the slot valid when we fault with 4k
+			 * base page size.
+			 *
+			 */
+			memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+		}
 	}
 
 	valid = hpte_valid(hpte_slot_array, index);
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 7e6d088..83a8be7 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -8,6 +8,8 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 
+#include <asm/mmu.h>
+
 #ifdef CONFIG_PPC_FSL_BOOK3E
 #ifdef CONFIG_PPC64
 static inline int tlb1_next(void)
@@ -60,6 +62,14 @@
 	unsigned long tmp;
 	int token = smp_processor_id() + 1;
 
+	/*
+	 * Besides being unnecessary in the absence of SMT, this
+	 * check prevents trying to do lbarx/stbcx. on e5500 which
+	 * doesn't implement either feature.
+	 */
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
 	asm volatile("1: lbarx %0, 0, %1;"
 		     "cmpwi %0, 0;"
 		     "bne 2f;"
@@ -80,6 +90,9 @@
 {
 	struct paca_struct *paca = get_paca();
 
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
 	isync();
 	paca->tcd_ptr->lock = 0;
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3..f078a1f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -541,7 +541,7 @@
 			res->name = "System RAM";
 			res->start = base;
 			res->end = base + size - 1;
-			res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 			WARN_ON(request_resource(&iomem_resource, res) < 0);
 		}
 	}
@@ -560,12 +560,12 @@
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+	if (page_is_rtas_user_buf(pfn))
+		return 1;
 	if (iomem_is_exclusive(PFN_PHYS(pfn)))
 		return 0;
 	if (!page_is_ram(pfn))
 		return 1;
-	if (page_is_rtas_user_buf(pfn))
-		return 1;
 	return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e..4087705 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -59,9 +59,9 @@
 
 	/* 8MB for 32bit, 1GB for 64bit */
 	if (is_32bit_task())
-		rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+		rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
 	else
-		rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+		rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
 
 	return rnd << PAGE_SHIFT;
 }
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3124a20..cdf2123 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -646,6 +646,28 @@
 	return pgtable;
 }
 
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+			     unsigned long address, pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+	/*
+	 * We can't mark the pmd none here, because that will cause a race
+	 * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+	 * we spilt, but at the same time we wan't rest of the ppc64 code
+	 * not to insert hash pte on this, because we will be modifying
+	 * the deposited pgtable in the caller of this function. Hence
+	 * clear the _PAGE_USER so that we move the fault handling to
+	 * higher level function and that will serialize against ptl.
+	 * We need to flush existing hash pte entries here even though,
+	 * the translation is still valid, because we will withdraw
+	 * pgtable_t after this.
+	 */
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+}
+
+
 /*
  * set a new huge pmd. We should not be called for updating
  * an existing pmd entry. That should go via pmd_hugepage_update.
@@ -663,10 +685,20 @@
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
 	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+	/*
+	 * This ensures that generic code that rely on IRQ disabling
+	 * to prevent a parallel THP split work as expected.
+	 */
+	kick_all_cpus_sync();
 }
 
 /*
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 7d5e295..9958ba8 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -816,7 +816,7 @@
 	.get_constraint		= power8_get_constraint,
 	.get_alternatives	= power8_get_alternatives,
 	.disable_pmc		= power8_disable_pmc,
-	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
 	.n_generic		= ARRAY_SIZE(power8_generic_events),
 	.generic_events		= power8_generic_events,
 	.cache_events		= &power8_cache_events,
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 5f152b9..87f47e5 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -444,9 +444,12 @@
 	 * PCI devices of the PE are expected to be removed prior
 	 * to PE reset.
 	 */
-	if (!edev->pe->bus)
+	if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
 		edev->pe->bus = pci_find_bus(hose->global_number,
 					     pdn->busno);
+		if (edev->pe->bus)
+			edev->pe->state |= EEH_PE_PRI_BUS;
+	}
 
 	/*
 	 * Enable EEH explicitly so that we will do EEH check
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 573ae199..f90dc04 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3180,6 +3180,7 @@
 
 static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
        .dma_dev_setup = pnv_pci_dma_dev_setup,
+       .dma_bus_setup = pnv_pci_dma_bus_setup,
 #ifdef CONFIG_PCI_MSI
        .setup_msi_irqs = pnv_setup_msi_irqs,
        .teardown_msi_irqs = pnv_teardown_msi_irqs,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 2f55c86..b1ef84a 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -599,6 +599,9 @@
 	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
 	long i;
 
+	if (proto_tce & TCE_PCI_WRITE)
+		proto_tce |= TCE_PCI_READ;
+
 	for (i = 0; i < npages; i++) {
 		unsigned long newtce = proto_tce |
 			((rpn + i) << tbl->it_page_shift);
@@ -620,6 +623,9 @@
 
 	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
 
+	if (newtce & TCE_PCI_WRITE)
+		newtce |= TCE_PCI_READ;
+
 	oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
 	*hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
 	*direction = iommu_tce_direction(oldtce);
@@ -760,6 +766,26 @@
 		phb->dma_dev_setup(phb, pdev);
 }
 
+void pnv_pci_dma_bus_setup(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pe;
+
+	list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+		if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+			continue;
+
+		if (!pe->pbus)
+			continue;
+
+		if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+			pe->pbus = bus;
+			break;
+		}
+	}
+}
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 7f56313..00691a9 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -242,6 +242,7 @@
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
 extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
 extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
 extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
 
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index ea91ddf..629c908 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -40,6 +40,7 @@
 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
 	fpregs->pad = 0;
+	fpregs->fpc = fpu->fpc;
 	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
 	else
@@ -49,6 +50,7 @@
 
 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
+	fpu->fpc = fpregs->fpc;
 	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
 	else
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 16aa0c7..595a275 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED	(3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)					\
 ({									\
@@ -54,14 +56,17 @@
 	__arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-	__arch_local_irq_ssm(flags);
+	/* only disabled->disabled and disabled->enabled is valid */
+	if (flags & ARCH_IRQ_ENABLED)
+		arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-	return !(flags & (3UL << (BITS_PER_LONG - 8)));
+	return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6742414..b0c8ad0 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -467,7 +467,7 @@
 struct kvm_s390_local_interrupt {
 	spinlock_t lock;
 	struct kvm_s390_float_interrupt *float_int;
-	wait_queue_head_t *wq;
+	struct swait_queue_head *wq;
 	atomic_t *cpuflags;
 	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
 	struct kvm_s390_irq_payload irq;
@@ -546,7 +546,6 @@
 	struct kvm_s390_sie_block *sie_block;
 	unsigned int      host_acrs[NUM_ACRS];
 	struct fpu	  host_fpregs;
-	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index 7aa7991..a52b6cc 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -37,7 +37,7 @@
 	regs->psw.addr = ip;
 }
 #else
-#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#error Include linux/livepatch.h, not asm/livepatch.h
 #endif
 
 #endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fb1b93e..e485817 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,17 +15,25 @@
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	spin_lock_init(&mm->context.list_lock);
+	INIT_LIST_HEAD(&mm->context.pgtable_list);
+	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.attach_count, 0);
 	mm->context.flush_mm = 0;
-	mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
-	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
 #endif
-	mm->context.asce_limit = STACK_TOP_MAX;
+	if (mm->context.asce_limit == 0) {
+		/* context created by exec, set asce limit to 4TB */
+		mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+		mm->context.asce_limit = STACK_TOP_MAX;
+	} else if (mm->context.asce_limit == (1UL << 31)) {
+		mm_inc_nr_pmds(mm);
+	}
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
 }
@@ -111,8 +119,6 @@
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
 				 struct mm_struct *mm)
 {
-	if (oldmm->context.asce_limit < mm->context.asce_limit)
-		crst_table_downgrade(mm, oldmm->context.asce_limit);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 1a9a98d..69aa18b 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES		0x7fff
-#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK	0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK	0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT		48
+#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK	((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES							\
+	((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK						\
+	(~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
 	u32 fh;
@@ -21,8 +24,9 @@
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)								\
-	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)							\
 	((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b7858f..d7cc79f 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -100,12 +100,26 @@
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	spin_lock_init(&mm->context.list_lock);
-	INIT_LIST_HEAD(&mm->context.pgtable_list);
-	INIT_LIST_HEAD(&mm->context.gmap_list);
-	return (pgd_t *) crst_table_alloc(mm);
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (!table)
+		return NULL;
+	if (mm->context.asce_limit == (1UL << 31)) {
+		/* Forking a compat process with 2 page table levels */
+		if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+			crst_table_free(mm, table);
+			return NULL;
+		}
+	}
+	return (pgd_t *) table;
 }
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	if (mm->context.asce_limit == (1UL << 31))
+		pgtable_pmd_page_dtor(virt_to_page(pgd));
+	crst_table_free(mm, (unsigned long *) pgd);
+}
 
 static inline void pmd_populate(struct mm_struct *mm,
 				pmd_t *pmd, pgtable_t pte)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index f16debf..1c4fe12 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -166,14 +166,14 @@
  */
 #define start_thread(regs, new_psw, new_stackp) do {			\
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;	\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
 	execve_tail();							\
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {			\
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
 	crst_table_downgrade(current->mm, 1UL << 31);			\
 	execve_tail();							\
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f00cd35..99bc456 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -149,7 +149,7 @@
 #define arch_has_block_step()	(1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -161,7 +161,7 @@
 static inline void instruction_pointer_set(struct pt_regs *regs,
 					   unsigned long val)
 {
-	regs->psw.addr = val | PSW_ADDR_AMODE;
+	regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-	return regs->gprs[15] & PSW_ADDR_INSN;
+	return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 34ec202..ab3aa68 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -310,7 +310,8 @@
 #define __NR_recvmsg		372
 #define __NR_shutdown		373
 #define __NR_mlock2		374
-#define NR_syscalls 375
+#define __NR_copy_file_range	375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 66c9441..4af6037 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -271,7 +271,7 @@
 
 	/* Restore high gprs from signal stack */
 	if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
-			     sizeof(&sregs_ext->gprs_high)))
+			     sizeof(sregs_ext->gprs_high)))
 		return -EFAULT;
 	for (i = 0; i < NUM_GPRS; i++)
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index fac4eed..ae2cda5 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -177,3 +177,4 @@
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index a92b39f..3986c9f 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -59,8 +59,6 @@
 	struct save_area *sa;
 
 	sa = (void *) memblock_alloc(sizeof(*sa), 8);
-	if (!sa)
-		return NULL;
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
 	else
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6fca0e4..c890a55 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1470,7 +1470,7 @@
 		except_str = "*";
 	else
 		except_str = "-";
-	caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+	caller = (unsigned long) entry->caller;
 	rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
 		      area, (long long)time_spec.tv_sec,
 		      time_spec.tv_nsec / 1000, level, except_str,
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dc8e204..02bd02f 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -34,22 +34,21 @@
 	unsigned long addr;
 
 	while (1) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		addr = sf->gprs[8] & PSW_ADDR_INSN;
+		addr = sf->gprs[8];
 		printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
 		/* Follow the backchain. */
 		while (1) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			addr = sf->gprs[8];
 			printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		addr = regs->psw.addr & PSW_ADDR_INSN;
+		addr = regs->psw.addr;
 		printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		low = sp;
 		sp = regs->gprs[15];
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 20a5caf..a0684de 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -252,14 +252,14 @@
 	unsigned long addr;
 
 	addr = S390_lowcore.program_old_psw.addr;
-	fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+	fixup = search_exception_tables(addr);
 	if (!fixup)
 		disabled_wait(0);
 	/* Disable low address protection before storing into lowcore. */
 	__ctl_store(cr0, 0, 0);
 	cr0_new = cr0 & ~(1UL << 28);
 	__ctl_load(cr0_new, 0, 0);
-	S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+	S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
 	__ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@
 	psw_t psw;
 
 	psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+	psw.addr = (unsigned long) s390_base_ext_handler;
 	S390_lowcore.external_new_psw = psw;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+	psw.addr = (unsigned long) s390_base_pgm_handler;
 	S390_lowcore.program_new_psw = psw;
 	s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
@@ -448,7 +448,6 @@
 	rescue_initrd();
 	clear_bss_section();
 	init_kernel_storage_key();
-	lockdep_init();
 	lockdep_off();
 	setup_lowcore_early();
 	setup_facility_list();
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index e0eaf11..0f7bfeb 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,7 +203,7 @@
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
-	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+	ip -= MCOUNT_INSN_SIZE;
 	trace.func = ip;
 	trace.depth = current->curr_ret_stack + 1;
 	/* Only trace if the calling function expects to. */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index c5febe8..03c2b46 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,7 @@
 
 __HEAD
 ENTRY(startup_continue)
-	tm	__LC_STFLE_FAC_LIST+6,0x80	# LPP available ?
+	tm	__LC_STFLE_FAC_LIST+5,0x80	# LPP available ?
 	jz	0f
 	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
 	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 0a5a6b6..f20abdb 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2057,12 +2057,12 @@
 	/* Set new machine check handler */
 	S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
 	S390_lowcore.mcck_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+		(unsigned long) s390_base_mcck_handler;
 
 	/* Set new program check handler */
 	S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
 	S390_lowcore.program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+		(unsigned long) s390_base_pgm_handler;
 
 	/*
 	 * Clear subchannel ID and number to signal new kernel that no CCW or
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 389db56..250f597 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -226,7 +226,7 @@
 	__ctl_load(per_kprobe, 9, 11);
 	regs->psw.mask |= PSW_MASK_PER;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-	regs->psw.addr = ip | PSW_ADDR_AMODE;
+	regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@
 	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
 	regs->psw.mask &= ~PSW_MASK_PER;
 	regs->psw.mask |= kcb->kprobe_saved_imask;
-	regs->psw.addr = ip | PSW_ADDR_AMODE;
+	regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -310,7 +310,7 @@
 	 */
 	preempt_disable();
 	kcb = get_kprobe_ctlblk();
-	p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+	p = get_kprobe((void *)(regs->psw.addr - 2));
 
 	if (p) {
 		if (kprobe_running()) {
@@ -460,7 +460,7 @@
 			break;
 	}
 
-	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+	regs->psw.addr = orig_ret_address;
 
 	pop_kprobe(get_kprobe_ctlblk());
 	kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+	unsigned long ip = regs->psw.addr;
 	int fixup = probe_get_fixup_type(p->ainsn.insn);
 
 	/* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		entry = search_exception_tables(regs->psw.addr);
 		if (entry) {
-			regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+			regs->psw.addr = extable_fixup(entry);
 			return 1;
 		}
 
@@ -683,7 +683,7 @@
 	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
 	/* setup return addr to the jprobe handler routine */
-	regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) jp->entry;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
 	/* r15 is the stack pointer */
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 61595c1..0943b11 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -74,7 +74,7 @@
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-	return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+	return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@
 	struct pt_regs *regs;
 
 	while (1) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		perf_callchain_store(entry, sf->gprs[8]);
 		/* Follow the backchain. */
 		while (1) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			perf_callchain_store(entry,
-					     sf->gprs[8] & PSW_ADDR_INSN);
+			perf_callchain_store(entry, sf->gprs[8]);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
 		sp = (unsigned long) (sf + 1);
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		perf_callchain_store(entry, sf->gprs[8]);
 		low = sp;
 		sp = regs->gprs[15];
 	}
@@ -262,12 +260,13 @@
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
 			   struct pt_regs *regs)
 {
-	unsigned long head;
+	unsigned long head, frame_size;
 	struct stack_frame *head_sf;
 
 	if (user_mode(regs))
 		return;
 
+	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
 	head = regs->gprs[15];
 	head_sf = (struct stack_frame *) head;
 
@@ -275,8 +274,9 @@
 		return;
 
 	head = head_sf->back_chain;
-	head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
-			     S390_lowcore.async_stack);
+	head = __store_trace(entry, head,
+			     S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+			     S390_lowcore.async_stack + frame_size);
 
 	__store_trace(entry, head, S390_lowcore.thread_info,
 		      S390_lowcore.thread_info + THREAD_SIZE);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 114ee8b..2bba7df 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -56,10 +56,10 @@
 		return 0;
 	low = task_stack_page(tsk);
 	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) tsk->thread.ksp;
 	if (sf <= low || sf > high)
 		return 0;
-	sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) sf->back_chain;
 	if (sf <= low || sf > high)
 		return 0;
 	return sf->gprs[8];
@@ -154,7 +154,7 @@
 		memset(&frame->childregs, 0, sizeof(struct pt_regs));
 		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
 				PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-		frame->childregs.psw.addr = PSW_ADDR_AMODE |
+		frame->childregs.psw.addr =
 				(unsigned long) kernel_thread_starter;
 		frame->childregs.gprs[9] = new_stackp; /* function */
 		frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@
 		return 0;
 	low = task_stack_page(p);
 	high = (struct stack_frame *) task_pt_regs(p);
-	sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) p->thread.ksp;
 	if (sf <= low || sf > high)
 		return 0;
 	for (count = 0; count < 16; count++) {
-		sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+		sf = (struct stack_frame *) sf->back_chain;
 		if (sf <= low || sf > high)
 			return 0;
-		return_address = sf->gprs[8] & PSW_ADDR_INSN;
+		return_address = sf->gprs[8];
 		if (!in_sched_functions(return_address))
 			return return_address;
 	}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 01c37b3..49b1c13 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -84,7 +84,7 @@
 		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
 			new.control |= PER_EVENT_IFETCH;
 		new.start = 0;
-		new.end = PSW_ADDR_INSN;
+		new.end = -1UL;
 	}
 
 	/* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@
 	else if (addr == (addr_t) &dummy->cr11)
 		/* End address of the active per set. */
 		return test_thread_flag(TIF_SINGLE_STEP) ?
-			PSW_ADDR_INSN : child->thread.per_user.end;
+			-1UL : child->thread.per_user.end;
 	else if (addr == (addr_t) &dummy->bits)
 		/* Single-step bit. */
 		return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@
 		}
 		return 0;
 	default:
-		/* Removing high order bit from addr (only for 31 bit). */
-		addr &= PSW_ADDR_INSN;
 		return ptrace_request(child, request, addr, data);
 	}
 }
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c6878fb..cedb019 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -301,25 +301,21 @@
 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
 	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
-	lc->restart_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+	lc->restart_psw.addr = (unsigned long) restart_int_handler;
 	lc->external_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->external_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 	lc->svc_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+	lc->svc_new_psw.addr = (unsigned long) system_call;
 	lc->program_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-	lc->mcck_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 	lc->io_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+	lc->io_new_psw.addr = (unsigned long) io_int_handler;
 	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union)
 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
@@ -378,17 +374,17 @@
 
 static struct resource code_resource = {
 	.name  = "Kernel code",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource data_resource = {
 	.name = "Kernel data",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource bss_resource = {
 	.name = "Kernel bss",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource __initdata *standard_resources[] = {
@@ -412,7 +408,7 @@
 
 	for_each_memblock(memory, reg) {
 		res = alloc_bootmem_low(sizeof(*res));
-		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		res->name = "System RAM";
 		res->start = reg->base;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 028cc46..d82562c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -331,13 +331,13 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ka->sa.sa_restorer;
 	} else {
 		/* Signal frame without vector registers are short ! */
 		__u16 __user *svc = (void __user *) frame + frame_size - 2;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
-		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+		restorer = (unsigned long) svc;
 	}
 
 	/* Set up registers for signal handler */
@@ -347,7 +347,7 @@
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
 		(PSW_USER_BITS & PSW_MASK_ASC) |
 		(regs->psw.mask & ~PSW_MASK_ASC);
-	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
 	regs->gprs[2] = sig;
 	regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-		restorer = (unsigned long)
-			ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ksig->ka.sa.sa_restorer;
 	} else {
 		__u16 __user *svc = &frame->svc_insn;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
 			return -EFAULT;
-		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+		restorer = (unsigned long) svc;
 	}
 
 	/* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
 		(PSW_USER_BITS & PSW_MASK_ASC) |
 		(regs->psw.mask & ~PSW_MASK_ASC);
-	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
 	regs->gprs[2] = ksig->sig;
 	regs->gprs[3] = (unsigned long) &frame->info;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a13468b..3c65a8e 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -623,8 +623,6 @@
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
 	page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-	if (!page)
-		panic("could not allocate memory for save area\n");
 	/* Set multi-threading state to the previous system. */
 	pcpu_set_smt(sclp.mtid_prev);
 	boot_cpu_addr = stap();
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 1785cd8..8f64ebd 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -21,12 +21,11 @@
 	unsigned long addr;
 
 	while(1) {
-		sp &= PSW_ADDR_INSN;
 		if (sp < low || sp > high)
 			return sp;
 		sf = (struct stack_frame *)sp;
 		while(1) {
-			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			addr = sf->gprs[8];
 			if (!trace->skip)
 				trace->entries[trace->nr_entries++] = addr;
 			else
@@ -34,7 +33,7 @@
 			if (trace->nr_entries >= trace->max_entries)
 				return sp;
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *)sp;
-		addr = regs->psw.addr & PSW_ADDR_INSN;
+		addr = regs->psw.addr;
 		if (savesched || !in_sched_functions(addr)) {
 			if (!trace->skip)
 				trace->entries[trace->nr_entries++] = addr;
@@ -60,33 +59,43 @@
 	}
 }
 
-void save_stack_trace(struct stack_trace *trace)
+static void __save_stack_trace(struct stack_trace *trace, unsigned long sp)
 {
-	register unsigned long sp asm ("15");
-	unsigned long orig_sp, new_sp;
+	unsigned long new_sp, frame_size;
 
-	orig_sp = sp & PSW_ADDR_INSN;
-	new_sp = save_context_stack(trace, orig_sp,
-				    S390_lowcore.panic_stack - PAGE_SIZE,
-				    S390_lowcore.panic_stack, 1);
-	if (new_sp != orig_sp)
-		return;
+	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+	new_sp = save_context_stack(trace, sp,
+			S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
+			S390_lowcore.panic_stack + frame_size, 1);
 	new_sp = save_context_stack(trace, new_sp,
-				    S390_lowcore.async_stack - ASYNC_SIZE,
-				    S390_lowcore.async_stack, 1);
-	if (new_sp != orig_sp)
-		return;
+			S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+			S390_lowcore.async_stack + frame_size, 1);
 	save_context_stack(trace, new_sp,
 			   S390_lowcore.thread_info,
 			   S390_lowcore.thread_info + THREAD_SIZE, 1);
 }
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	register unsigned long r15 asm ("15");
+	unsigned long sp;
+
+	sp = r15;
+	__save_stack_trace(trace, sp);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	unsigned long sp, low, high;
 
-	sp = tsk->thread.ksp & PSW_ADDR_INSN;
+	sp = tsk->thread.ksp;
+	if (tsk == current) {
+		/* Get current stack pointer. */
+		asm volatile("la %0,0(15)" : "=a" (sp));
+	}
 	low = (unsigned long) task_stack_page(tsk);
 	high = (unsigned long) task_pt_regs(tsk);
 	save_context_stack(trace, sp, low, high, 0);
@@ -94,3 +103,14 @@
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	unsigned long sp;
+
+	sp = kernel_stack_pointer(regs);
+	__save_stack_trace(trace, sp);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 5378c3e..293d8b9 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -383,3 +383,4 @@
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
index 21a5df9..dde7654 100644
--- a/arch/s390/kernel/trace.c
+++ b/arch/s390/kernel/trace.c
@@ -18,6 +18,9 @@
 	unsigned long flags;
 	unsigned int *depth;
 
+	/* Avoid lockdep recursion. */
+	if (IS_ENABLED(CONFIG_LOCKDEP))
+		return;
 	local_irq_save(flags);
 	depth = this_cpu_ptr(&diagnose_trace_depth);
 	if (*depth == 0) {
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index d69d648..017eb03d 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -32,8 +32,7 @@
 		address = *(unsigned long *)(current->thread.trap_tdb + 24);
 	else
 		address = regs->psw.addr;
-	return (void __user *)
-		((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+	return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@
 		return;
 	printk("User process fault: interruption code %04x ilc:%d ",
 	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+	print_vma_addr("in ", regs->psw.addr);
 	printk("\n");
 	show_regs(regs);
 }
@@ -69,13 +68,13 @@
 		report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
-			regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+			regs->psw.addr = extable_fixup(fixup);
 		else {
 			enum bug_trap_type btt;
 
-			btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+			btt = report_bug(regs->psw.addr, regs);
 			if (btt == BUG_TRAP_TYPE_WARN)
 				return;
 			die(regs, str);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52c..5ea5af3 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -29,6 +29,7 @@
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select SRCU
+	select KVM_VFIO
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3b5534..d42fa38 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 47518a3..d697312 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -116,7 +116,7 @@
 	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
 		*cr9 &= ~PER_CONTROL_ALTERATION;
 		*cr10 = 0;
-		*cr11 = PSW_ADDR_INSN;
+		*cr11 = -1UL;
 	} else {
 		*cr9 &= ~PER_CONTROL_ALTERATION;
 		*cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@
 		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
 		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
 		vcpu->arch.sie_block->gcr[10] = 0;
-		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+		vcpu->arch.sie_block->gcr[11] = -1UL;
 	}
 
 	if (guestdbg_hw_bp_enabled(vcpu)) {
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f88ca72..9ffc732 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -966,13 +966,13 @@
 
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 {
-	if (waitqueue_active(&vcpu->wq)) {
+	if (swait_active(&vcpu->wq)) {
 		/*
 		 * The vcpu gave up the cpu voluntarily, mark it as a good
 		 * yield-candidate.
 		 */
 		vcpu->preempted = true;
-		wake_up_interruptible(&vcpu->wq);
+		swake_up(&vcpu->wq);
 		vcpu->stat.halt_wakeup++;
 	}
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 835d60b..03dfe9c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1423,44 +1423,18 @@
 	return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-	dst->fpc = current->thread.fpu.fpc;
-	dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-	current->thread.fpu.fpc = from->fpc;
-	current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	/* Save host register state */
 	save_fpu_regs();
-	save_fpu_to(&vcpu->arch.host_fpregs);
+	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-		/*
-		 * Use the register save area in the SIE-control block
-		 * for register restore and save in kvm_arch_vcpu_put()
-		 */
-		current->thread.fpu.vxrs =
-			(__vector128 *)&vcpu->run->s.regs.vrs;
-	} else
-		load_fpu_from(&vcpu->arch.guest_fpregs);
-
+	/* Depending on MACHINE_HAS_VX, data stored to vrs either
+	 * has vector register or floating point register format.
+	 */
+	current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
 	if (test_fp_ctl(current->thread.fpu.fpc))
 		/* User space provided an invalid FPC, let's clear it */
 		current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@
 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 
+	/* Save guest register state */
 	save_fpu_regs();
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-	if (test_kvm_facility(vcpu->kvm, 129))
-		/*
-		 * kvm_arch_vcpu_load() set up the register save area to
-		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-		 * are already saved.  Only the floating-point control must be
-		 * copied.
-		 */
-		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-	else
-		save_fpu_to(&vcpu->arch.guest_fpregs);
-	load_fpu_from(&vcpu->arch.host_fpregs);
+	/* Restore host register state */
+	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@
 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-	vcpu->arch.guest_fpregs.fpc = 0;
-	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	/* make sure the new fpc will be lazily loaded */
+	save_fpu_regs();
+	current->thread.fpu.fpc = 0;
 	vcpu->arch.sie_block->gbea = 1;
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-	/*
-	 * Allocate a save area for floating-point registers.  If the vector
-	 * extension is available, register contents are saved in the SIE
-	 * control block.  The allocated save area is still required in
-	 * particular places, for example, in kvm_s390_vcpu_store_status().
-	 */
-	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-					       GFP_KERNEL);
-	if (!vcpu->arch.guest_fpregs.fprs)
-		goto out_free_sie_block;
-
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_sie_block;
@@ -1879,19 +1837,27 @@
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+	/* make sure the new values will be lazily loaded */
+	save_fpu_regs();
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
-	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	save_fpu_regs();
-	load_fpu_from(&vcpu->arch.guest_fpregs);
+	current->thread.fpu.fpc = fpu->fpc;
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+	else
+		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	/* make sure we have the latest values */
+	save_fpu_regs();
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+	else
+		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+	fpu->fpc = current->thread.fpu.fpc;
 	return 0;
 }
 
@@ -2396,6 +2362,7 @@
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
 	unsigned char archmode = 1;
+	freg_t fprs[NUM_FPRS];
 	unsigned int px;
 	u64 clkcomp;
 	int rc;
@@ -2411,8 +2378,16 @@
 		gpa = px;
 	} else
 		gpa -= __LC_FPREGS_SAVE_AREA;
-	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-			     vcpu->arch.guest_fpregs.fprs, 128);
+
+	/* manually convert vector registers if necessary */
+	if (MACHINE_HAS_VX) {
+		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     fprs, 128);
+	} else {
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     vcpu->run->s.regs.vrs, 128);
+	}
 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
 			      vcpu->run->s.regs.gprs, 128);
 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@
 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
 			      &px, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-			      &vcpu->arch.guest_fpregs.fpc, 4);
+			      &vcpu->run->s.regs.fpc, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
 			      &vcpu->arch.sie_block->todpr, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@
 	 * it into the save area
 	 */
 	save_fpu_regs();
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		/*
-		 * If the vector extension is available, the vector registers
-		 * which overlaps with floating-point registers are saved in
-		 * the SIE-control block.  Hence, extract the floating-point
-		 * registers and the FPC value and store them in the
-		 * guest_fpregs structure.
-		 */
-		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-				 current->thread.fpu.vxrs);
-	} else
-		save_fpu_to(&vcpu->arch.guest_fpregs);
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1b903f6..791a414 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -228,7 +228,7 @@
 		return;
 	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
 	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
 	printk(KERN_CONT "\n");
 	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
 	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,9 +256,9 @@
 	const struct exception_table_entry *fixup;
 
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+	fixup = search_exception_tables(regs->psw.addr);
 	if (fixup) {
-		regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+		regs->psw.addr = extable_fixup(fixup);
 		return;
 	}
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c722400..73e2903 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -98,7 +98,7 @@
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
-	arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+	__arch_local_irq_stosm(0x04);
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index fec59c0..792f9c6 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -93,15 +93,19 @@
  */
 int memcpy_real(void *dest, void *src, size_t count)
 {
+	int irqs_disabled, rc;
 	unsigned long flags;
-	int rc;
 
 	if (!count)
 		return 0;
-	local_irq_save(flags);
-	__arch_local_irq_stnsm(0xfbUL);
+	flags = __arch_local_irq_stnsm(0xf8UL);
+	irqs_disabled = arch_irqs_disabled_flags(flags);
+	if (!irqs_disabled)
+		trace_hardirqs_off();
 	rc = __memcpy_real(dest, src, count);
-	local_irq_restore(flags);
+	if (!irqs_disabled)
+		trace_hardirqs_on();
+	__arch_local_irq_ssm(flags);
 	return rc;
 }
 
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index ea01477..45c4daa 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -169,12 +169,12 @@
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-	if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+	if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
 		return 0;
 	if (!(flags & MAP_FIXED))
 		addr = 0;
 	if ((addr + len) >= TASK_SIZE)
-		return crst_table_upgrade(current->mm, 1UL << 53);
+		return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
 	return 0;
 }
 
@@ -189,9 +189,9 @@
 	area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@
 	area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area_topdown(filp, addr, len,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a809fa8..5109827 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -55,7 +55,7 @@
 	unsigned long entry;
 	int flush;
 
-	BUG_ON(limit > (1UL << 53));
+	BUG_ON(limit > TASK_MAX_SIZE);
 	flush = 0;
 repeat:
 	table = crst_table_alloc(mm);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 43f32ce..2794845 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -57,9 +57,7 @@
 {
 	pg_data_t *res;
 
-	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
-	if (!res)
-		panic("Could not allocate memory for node data!\n");
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
 	memset(res, 0, sizeof(pg_data_t));
 	return res;
 }
@@ -162,7 +160,7 @@
 		register_one_node(nid);
 	return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
index 8a6811b..1884e17 100644
--- a/arch/s390/oprofile/backtrace.c
+++ b/arch/s390/oprofile/backtrace.c
@@ -16,24 +16,23 @@
 	struct pt_regs *regs;
 
 	while (*depth) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
 		(*depth)--;
-		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		oprofile_add_trace(sf->gprs[8]);
 
 		/* Follow the backchain.  */
 		while (*depth) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
 			(*depth)--;
-			oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+			oprofile_add_trace(sf->gprs[8]);
 
 		}
 
@@ -46,7 +45,7 @@
 			return sp;
 		regs = (struct pt_regs *) sp;
 		(*depth)--;
-		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		oprofile_add_trace(sf->gprs[8]);
 		low = sp;
 		sp = regs->gprs[15];
 	}
@@ -55,12 +54,13 @@
 
 void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
-	unsigned long head;
+	unsigned long head, frame_size;
 	struct stack_frame* head_sf;
 
 	if (user_mode(regs))
 		return;
 
+	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
 	head = regs->gprs[15];
 	head_sf = (struct stack_frame*)head;
 
@@ -69,8 +69,9 @@
 
 	head = head_sf->back_chain;
 
-	head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE,
-			    S390_lowcore.async_stack);
+	head = __show_trace(&depth, head,
+			    S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+			    S390_lowcore.async_stack + frame_size);
 
 	__show_trace(&depth, head, S390_lowcore.thread_info,
 		     S390_lowcore.thread_info + THREAD_SIZE);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 11d4f27..8f19c8f 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -68,9 +68,12 @@
 	.isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES						\
+	min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),	\
+	    ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -265,27 +268,20 @@
 			      unsigned long max)
 {
 	struct zpci_dev *zdev =	to_zpci(pdev);
-	u64 addr;
 	int idx;
 
-	if ((bar & 7) != bar)
+	if (!pci_resource_len(pdev, bar))
 		return NULL;
 
 	idx = zdev->bars[bar].map_idx;
 	spin_lock(&zpci_iomap_lock);
-	if (zpci_iomap_start[idx].count++) {
-		BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-		       zpci_iomap_start[idx].bar != bar);
-	} else {
-		zpci_iomap_start[idx].fh = zdev->fh;
-		zpci_iomap_start[idx].bar = bar;
-	}
 	/* Detect overrun */
-	BUG_ON(!zpci_iomap_start[idx].count);
+	WARN_ON(!++zpci_iomap_start[idx].count);
+	zpci_iomap_start[idx].fh = zdev->fh;
+	zpci_iomap_start[idx].bar = bar;
 	spin_unlock(&zpci_iomap_lock);
 
-	addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-	return (void __iomem *) addr + offset;
+	return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,12 +293,11 @@
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-	unsigned int idx;
+	unsigned int idx = ZPCI_IDX(addr);
 
-	idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
 	spin_lock(&zpci_iomap_lock);
 	/* Detect underrun */
-	BUG_ON(!zpci_iomap_start[idx].count);
+	WARN_ON(!zpci_iomap_start[idx].count);
 	if (!--zpci_iomap_start[idx].count) {
 		zpci_iomap_start[idx].fh = 0;
 		zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-	int entry;
+	unsigned long entry;
 
 	spin_lock(&zpci_iomap_lock);
-	entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-	if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+	entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+	if (entry == ZPCI_IOMAP_ENTRIES) {
 		spin_unlock(&zpci_iomap_lock);
 		return -ENOSPC;
 	}
-	set_bit(entry, zpci_iomap);
+	set_bit(entry, zpci_iomap_bitmap);
 	spin_unlock(&zpci_iomap_lock);
 	return entry;
 }
@@ -561,7 +556,7 @@
 {
 	spin_lock(&zpci_iomap_lock);
 	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-	clear_bit(entry, zpci_iomap);
+	clear_bit(entry, zpci_iomap_bitmap);
 	spin_unlock(&zpci_iomap_lock);
 }
 
@@ -611,8 +606,7 @@
 		if (zdev->bars[i].val & 4)
 			flags |= IORESOURCE_MEM_64;
 
-		addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+		addr = ZPCI_ADDR(entry);
 		size = 1UL << zdev->bars[i].size;
 
 		res = __alloc_res(zdev, addr, size, flags);
@@ -873,23 +867,30 @@
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
 				16, 0, NULL);
 	if (!zdev_fmb_cache)
-		goto error_zdev;
+		goto error_fmb;
 
-	/* TODO: use realloc */
-	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-				   GFP_KERNEL);
+	zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+				   sizeof(*zpci_iomap_start), GFP_KERNEL);
 	if (!zpci_iomap_start)
 		goto error_iomap;
-	return 0;
 
+	zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+				    sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+	if (!zpci_iomap_bitmap)
+		goto error_iomap_bitmap;
+
+	return 0;
+error_iomap_bitmap:
+	kfree(zpci_iomap_start);
 error_iomap:
 	kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
 	return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+	kfree(zpci_iomap_bitmap);
 	kfree(zpci_iomap_start);
 	kmem_cache_destroy(zdev_fmb_cache);
 }
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 369a3e0..b0e0475 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -53,6 +53,11 @@
 
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+	if (!pdev)
+		return;
+
+	pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index b48459a..f3a0649 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -101,7 +101,7 @@
 	res->name = "System RAM";
 	res->start = MEMORY_START;
 	res->end = MEMORY_START + MEMORY_SIZE - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	request_resource(&iomem_resource, res);
 
 	request_resource(res, &code_resource);
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index f887c64..8a84e05 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index de19cfa..3f1c18b 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -78,17 +78,17 @@
 
 static struct resource code_resource = {
 	.name = "Kernel code",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource data_resource = {
 	.name = "Kernel data",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 unsigned long memory_start;
@@ -202,7 +202,7 @@
 	res->name = "System RAM";
 	res->start = start;
 	res->end = end - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 	if (request_resource(&iomem_resource, res)) {
 		pr_err("unable to request memory_resource 0x%lx 0x%lx\n",
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index eaee146..8496a07 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -24,7 +24,13 @@
 export BITS    := 32
 UTS_MACHINE    := sparc
 
+# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+# versions of gcc.  Some gcc versions won't pass -Av8 to binutils when you
+# give -mcpu=v8.  This silently worked with older bintutils versions but
+# does not any more.
 KBUILD_CFLAGS  += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+KBUILD_CFLAGS  += -Wa,-Av8
+
 KBUILD_AFLAGS  += -m32 -Wa,-Av8
 
 else
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 1c26d44..b6de8b1 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -422,8 +422,9 @@
 #define __NR_listen		354
 #define __NR_setsockopt		355
 #define __NR_mlock2		356
+#define __NR_copy_file_range	357
 
-#define NR_syscalls		357
+#define NR_syscalls		358
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 33c02b1..a83707c 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -948,7 +948,24 @@
 	cmp	%o0, 0
 	bne	3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %g1
+	sethi	%hi(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I0], %i0
+	or	%l7, %lo(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I1], %i1
+	ld	[%sp + STACKFRAME_SZ + PT_I2], %i2
+	ld	[%sp + STACKFRAME_SZ + PT_I3], %i3
+	ld	[%sp + STACKFRAME_SZ + PT_I4], %i4
+	ld	[%sp + STACKFRAME_SZ + PT_I5], %i5
+	cmp	%g1, NR_syscalls
+	bgeu	3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	mov	%i0, %o0
+	ld	[%l7 + %l4], %l7
 	mov	%i1, %o1
 	mov	%i2, %o2
 	mov	%i3, %o3
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index f2d30ca..cd1f592 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -696,14 +696,6 @@
 	call	__bzero
 	 sub	%o1, %o0, %o1
 
-#ifdef CONFIG_LOCKDEP
-	/* We have this call this super early, as even prom_init can grab
-	 * spinlocks and thus call into the lockdep code.
-	 */
-	call	lockdep_init
-	 nop
-#endif
-
 	call	prom_init
 	 mov	%l7, %o0			! OpenPROM cif handler
 
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index afbaba5..d127130 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -338,8 +338,9 @@
 	mov	%o1, %o4
 	mov	HV_FAST_MACH_SET_WATCHDOG, %o5
 	ta	HV_FAST_TRAP
+	brnz,a,pn %o4, 0f
 	stx	%o1, [%o4]
-	retl
+0:	retl
 	 nop
 ENDPROC(sun4v_mach_set_watchdog)
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index d88beff4..39aaec1 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -52,7 +52,7 @@
 	unsigned char fenab;
 	int err;
 
-	flush_user_windows();
+	synchronize_user_stack();
 	if (get_thread_wsaved()					||
 	    (((unsigned long)ucp) & (sizeof(unsigned long)-1))	||
 	    (!__access_ok(ucp, sizeof(*ucp))))
diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
index a92d5d2..9e034f2 100644
--- a/arch/sparc/kernel/sparc_ksyms_64.c
+++ b/arch/sparc/kernel/sparc_ksyms_64.c
@@ -37,6 +37,7 @@
 EXPORT_SYMBOL(sun4v_niagara_setperf);
 EXPORT_SYMBOL(sun4v_niagara2_getperf);
 EXPORT_SYMBOL(sun4v_niagara2_setperf);
+EXPORT_SYMBOL(sun4v_mach_set_watchdog);
 
 /* from hweight.S */
 EXPORT_SYMBOL(__arch_hweight8);
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c690c8e..b489e97 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -264,7 +264,7 @@
 	unsigned long rnd = 0UL;
 
 	if (current->flags & PF_RANDOMIZE) {
-		unsigned long val = get_random_int();
+		unsigned long val = get_random_long();
 		if (test_thread_flag(TIF_32BIT))
 			rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
 		else
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index bb00089..c4a1b5c 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -158,7 +158,25 @@
 	 add	%sp, PTREGS_OFF, %o0
 	brnz,pn	%o0, 3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	srl	%i0, 0, %o0
+	lduw	[%l7 + %l4], %l7
 	srl	%i4, 0, %o4
 	srl	%i1, 0, %o1
 	srl	%i2, 0, %o2
@@ -170,7 +188,25 @@
 	 add	%sp, PTREGS_OFF, %o0
 	brnz,pn	%o0, 3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	mov	%i0, %o0
+	lduw	[%l7 + %l4], %l7
 	mov	%i1, %o1
 	mov	%i2, %o2
 	mov	%i3, %o3
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index e663b6c..6c3dd6c 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -88,4 +88,4 @@
 /*340*/	.long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 /*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-/*355*/	.long sys_setsockopt, sys_mlock2
+/*355*/	.long sys_setsockopt, sys_mlock2, sys_copy_file_range
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 1557121..12b524c 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -89,7 +89,7 @@
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-	.word compat_sys_setsockopt, sys_mlock2
+	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range
 
 #endif /* CONFIG_COMPAT */
 
@@ -170,4 +170,4 @@
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-	.word sys_setsockopt, sys_mlock2
+	.word sys_setsockopt, sys_mlock2, sys_copy_file_range
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 6f21685..1cfe6aa 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2863,17 +2863,17 @@
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource data_resource = {
 	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static inline resource_size_t compute_kern_paddr(void *addr)
@@ -2909,7 +2909,7 @@
 		res->name = "System RAM";
 		res->start = pavail[i].phys_addr;
 		res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
-		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		if (insert_resource(&iomem_resource, res) < 0) {
 			pr_warn("Resource insertion failed.\n");
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index bbb855d..a992238 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1632,14 +1632,14 @@
 	.name	= "Kernel data",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 /*
@@ -1673,10 +1673,15 @@
 		kzalloc(sizeof(struct resource), GFP_ATOMIC);
 	if (!res)
 		return NULL;
-	res->name = reserved ? "Reserved" : "System RAM";
 	res->start = start_pfn << PAGE_SHIFT;
 	res->end = (end_pfn << PAGE_SHIFT) - 1;
 	res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	if (reserved) {
+		res->name = "Reserved";
+	} else {
+		res->name = "System RAM";
+		res->flags |= IORESOURCE_SYSRAM;
+	}
 	if (insert_resource(&iomem_resource, res)) {
 		kfree(res);
 		return NULL;
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index e13d41c..f878bec 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -34,21 +34,18 @@
 
 #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
 
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
-#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32))
+#define pte_val(p) ((p).pte)
 
-#define pte_get_bits(pte, bits) ((pte).pte_low & (bits))
-#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits))
-#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits))
-#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \
-			      smp_wmb(); \
-			      (to).pte_low = (from).pte_low; })
-#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high)
-#define pte_set_val(pte, phys, prot) \
-	({ (pte).pte_high = (phys) >> 32; \
-	   (pte).pte_low = (phys) | pgprot_val(prot); })
+#define pte_get_bits(p, bits) ((p).pte & (bits))
+#define pte_set_bits(p, bits) ((p).pte |= (bits))
+#define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
+#define pte_copy(to, from) ({ (to).pte = (from).pte; })
+#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
+#define pte_set_val(p, phys, prot) \
+	({ (p).pte = (phys) | pgprot_val(prot); })
 
 #define pmd_val(x)	((x).pmd)
 #define __pmd(x) ((pmd_t) { (x) } )
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 9bdf67a..b60a9f8 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <skas.h>
 
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 static void kill_off_processes(void)
 {
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index fc8be0e..57acbd6 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -69,7 +69,7 @@
 	struct ksignal ksig;
 	int handled_sig = 0;
 
-	if (get_signal(&ksig)) {
+	while (get_signal(&ksig)) {
 		handled_sig = 1;
 		/* Whee!  Actually deliver the signal.  */
 		handle_signal(&ksig, regs);
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 3fa317f..c2bffa5 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -72,13 +72,13 @@
 		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	},
 	{
 		.name = "Kernel data",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	}
 };
 
@@ -211,7 +211,7 @@
 		res->name  = "System RAM";
 		res->start = mi->bank[i].start;
 		res->end   = mi->bank[i].start + mi->bank[i].size - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 1538562..eb3abf8 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,6 +1,7 @@
-
 obj-y += entry/
 
+obj-$(CONFIG_PERF_EVENTS) += events/
+
 obj-$(CONFIG_KVM) += kvm/
 
 # Xen paravirtualization support
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 330e738..8f2e665 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -303,6 +303,9 @@
 config FIX_EARLYCON_MEM
 	def_bool y
 
+config DEBUG_RODATA
+	def_bool y
+
 config PGTABLE_LEVELS
 	int
 	default 4 if X86_64
@@ -475,6 +478,7 @@
 	depends on X86_64
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
+	depends on EFI
 	depends on X86_X2APIC
 	depends on PCI
 	---help---
@@ -509,11 +513,10 @@
 
 config X86_INTEL_MID
 	bool "Intel MID platform support"
-	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
 	depends on X86_PLATFORM_DEVICES
 	depends on PCI
-	depends on PCI_GOANY
+	depends on X86_64 || (PCI_GOANY && X86_32)
 	depends on X86_IO_APIC
 	select SFI
 	select I2C
@@ -778,8 +781,8 @@
 	  HPET is the next generation timer replacing legacy 8254s.
 	  The HPET provides a stable time base on SMP
 	  systems, unlike the TSC, but it is more expensive to access,
-	  as it is off-chip.  You can find the HPET spec at
-	  <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
+	  as it is off-chip.  The interface used is documented
+	  in the HPET spec, revision 1.
 
 	  You can safely choose Y here.  However, HPET will only be
 	  activated if the platform and the BIOS support this feature.
@@ -1160,22 +1163,23 @@
 	bool "CPU microcode loading support"
 	default y
 	depends on CPU_SUP_AMD || CPU_SUP_INTEL
-	depends on BLK_DEV_INITRD
 	select FW_LOADER
 	---help---
-
 	  If you say Y here, you will be able to update the microcode on
-	  certain Intel and AMD processors. The Intel support is for the
-	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
-	  Xeon etc. The AMD support is for families 0x10 and later. You will
-	  obviously need the actual microcode binary data itself which is not
-	  shipped with the Linux kernel.
+	  Intel and AMD processors. The Intel support is for the IA32 family,
+	  e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
+	  AMD support is for families 0x10 and later. You will obviously need
+	  the actual microcode binary data itself which is not shipped with
+	  the Linux kernel.
 
-	  This option selects the general module only, you need to select
-	  at least one vendor specific module as well.
+	  The preferred method to load microcode from a detached initrd is described
+	  in Documentation/x86/early-microcode.txt. For that you need to enable
+	  CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
+	  initrd for microcode blobs.
 
-	  To compile this driver as a module, choose M here: the module
-	  will be called microcode.
+	  In addition, you can build-in the microcode into the kernel. For that you
+	  need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
+	  to the CONFIG_EXTRA_FIRMWARE config option.
 
 config MICROCODE_INTEL
 	bool "Intel microcode loading support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed9..67eec55 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -74,28 +74,16 @@
 	  issues with the mapping of the EFI runtime regions into that
 	  table.
 
-config DEBUG_RODATA
-	bool "Write protect kernel read-only data structures"
-	default y
-	depends on DEBUG_KERNEL
-	---help---
-	  Mark the kernel read-only data as write-protected in the pagetables,
-	  in order to catch accidental (and incorrect) writes to such const
-	  data. This is recommended so that we can catch kernel bugs sooner.
-	  If in doubt, say "Y".
-
 config DEBUG_RODATA_TEST
-	bool "Testcase for the DEBUG_RODATA feature"
-	depends on DEBUG_RODATA
+	bool "Testcase for the marking rodata read-only"
 	default y
 	---help---
-	  This option enables a testcase for the DEBUG_RODATA
-	  feature as well as for the change_page_attr() infrastructure.
+	  This option enables a testcase for the setting rodata read-only
+	  as well as for the change_page_attr() infrastructure.
 	  If in doubt, say "N"
 
 config DEBUG_WX
 	bool "Warn on W+X mappings at boot"
-	depends on DEBUG_RODATA
 	select X86_PTDUMP_CORE
 	---help---
 	  Generate a warning if any W+X mappings are found at boot.
@@ -350,16 +338,6 @@
 
 	  If unsure say N here.
 
-config X86_DEBUG_STATIC_CPU_HAS
-	bool "Debug alternatives"
-	depends on DEBUG_KERNEL
-	---help---
-	  This option causes additional code to be generated which
-	  fails if static_cpu_has() is used before alternatives have
-	  run.
-
-	  If unsure, say N.
-
 config X86_DEBUG_FPU
 	bool "Debug the x86 FPU code"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697..4cb404f 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -1,7 +1,7 @@
 #ifndef BOOT_CPUFLAGS_H
 #define BOOT_CPUFLAGS_H
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/processor-flags.h>
 
 struct cpu_features {
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e..f72498d 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,7 +17,7 @@
 
 #include "../include/asm/required-features.h"
 #include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a7661c4..0702d25 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -49,7 +49,6 @@
 
 /* This must be large enough to hold the entire setup */
 u8 buf[SETUP_SECT_MAX*512];
-int is_big_kernel;
 
 #define PECOFF_RELOC_RESERVE 0x20
 
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 028be48..e25a163 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -288,7 +288,7 @@
 CONFIG_NLS_UTF8=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_FRAME_WARN=2048
+CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
index 712b130..3a33124 100644
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S
@@ -157,7 +157,9 @@
 	# done with the slightly better performing SSSE3 byte shuffling,
 	# 7/12-bit word rotation uses traditional shift+OR.
 
-	sub		$0x40,%rsp
+	mov		%rsp,%r11
+	sub		$0x80,%rsp
+	and		$~63,%rsp
 
 	# x0..15[0-3] = s0..3[0..3]
 	movq		0x00(%rdi),%xmm1
@@ -620,6 +622,6 @@
 	pxor		%xmm1,%xmm15
 	movdqu		%xmm15,0xf0(%rsi)
 
-	add		$0x40,%rsp
+	mov		%r11,%rsp
 	ret
 ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c..27226df 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -33,7 +33,7 @@
 #include <linux/crc32.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/api.h>
 
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e98716..0857b1a 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -30,7 +30,7 @@
 #include <linux/kernel.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/internal.h>
 
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc9..cd4df93 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -30,7 +30,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 
 asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index e32206e..9a9e588 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,37 +201,6 @@
 	.byte 0xf1
 	.endm
 
-#else /* CONFIG_X86_64 */
-
-/*
- * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
- * are different from the entry_32.S versions in not changing the segment
- * registers. So only suitable for in kernel use, not when transitioning
- * from or to user space. The resulting stack frame is not a standard
- * pt_regs frame. The main use case is calling C code from assembler
- * when all the registers need to be preserved.
- */
-
-	.macro SAVE_ALL
-	pushl %eax
-	pushl %ebp
-	pushl %edi
-	pushl %esi
-	pushl %edx
-	pushl %ecx
-	pushl %ebx
-	.endm
-
-	.macro RESTORE_ALL
-	popl %ebx
-	popl %ecx
-	popl %edx
-	popl %esi
-	popl %edi
-	popl %ebp
-	popl %eax
-	.endm
-
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 0366374..e79d93d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
 #include <asm/traps.h>
 #include <asm/vdso.h>
 #include <asm/uaccess.h>
+#include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -44,6 +45,8 @@
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit();
 }
+#else
+static inline void enter_from_user_mode(void) {}
 #endif
 
 static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@@ -84,17 +87,6 @@
 
 	work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
 
-#ifdef CONFIG_CONTEXT_TRACKING
-	/*
-	 * If TIF_NOHZ is set, we are required to call user_exit() before
-	 * doing anything that could touch RCU.
-	 */
-	if (work & _TIF_NOHZ) {
-		enter_from_user_mode();
-		work &= ~_TIF_NOHZ;
-	}
-#endif
-
 #ifdef CONFIG_SECCOMP
 	/*
 	 * Do seccomp first -- it should minimize exposure of other
@@ -171,16 +163,6 @@
 	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
 		BUG_ON(regs != task_pt_regs(current));
 
-	/*
-	 * If we stepped into a sysenter/syscall insn, it trapped in
-	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
-	 * If user-mode had set TF itself, then it's still clear from
-	 * do_debug() and we need to set it again to restore the user
-	 * state.  If we entered on the slow path, TF was already set.
-	 */
-	if (work & _TIF_SINGLESTEP)
-		regs->flags |= X86_EFLAGS_TF;
-
 #ifdef CONFIG_SECCOMP
 	/*
 	 * Call seccomp_phase2 before running the other hooks so that
@@ -268,6 +250,7 @@
 /* Called with IRQs disabled. */
 __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 {
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	u32 cached_flags;
 
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -275,12 +258,22 @@
 
 	lockdep_sys_exit();
 
-	cached_flags =
-		READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+	cached_flags = READ_ONCE(ti->flags);
 
 	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
 		exit_to_usermode_loop(regs, cached_flags);
 
+#ifdef CONFIG_COMPAT
+	/*
+	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
+	 * returning to user mode.  We need to clear it *after* signal
+	 * handling, because syscall restart has a fixup for compat
+	 * syscalls.  The fixup is exercised by the ptrace_syscall_32
+	 * selftest.
+	 */
+	ti->status &= ~TS_COMPAT;
+#endif
+
 	user_enter();
 }
 
@@ -332,33 +325,45 @@
 	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
 		syscall_slow_exit_work(regs, cached_flags);
 
-#ifdef CONFIG_COMPAT
-	/*
-	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
-	 * returning to user mode.
-	 */
-	ti->status &= ~TS_COMPAT;
-#endif
-
 	local_irq_disable();
 	prepare_exit_to_usermode(regs);
 }
 
+#ifdef CONFIG_X86_64
+__visible void do_syscall_64(struct pt_regs *regs)
+{
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	unsigned long nr = regs->orig_ax;
+
+	enter_from_user_mode();
+	local_irq_enable();
+
+	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
+		nr = syscall_trace_enter(regs);
+
+	/*
+	 * NB: Native and x32 syscalls are dispatched from the same
+	 * table.  The only functional difference is the x32 bit in
+	 * regs->orig_ax, which changes the behavior of some syscalls.
+	 */
+	if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+		regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+			regs->di, regs->si, regs->dx,
+			regs->r10, regs->r8, regs->r9);
+	}
+
+	syscall_return_slowpath(regs);
+}
+#endif
+
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 /*
- * Does a 32-bit syscall.  Called with IRQs on and does all entry and
- * exit work and returns with IRQs off.  This function is extremely hot
- * in workloads that use it, and it's usually called from
+ * Does a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.  Does
+ * all entry and exit work and returns with IRQs off.  This function is
+ * extremely hot in workloads that use it, and it's usually called from
  * do_fast_syscall_32, so forcibly inline it to improve performance.
  */
-#ifdef CONFIG_X86_32
-/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
-__visible
-#else
-/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
-static
-#endif
-__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 {
 	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	unsigned int nr = (unsigned int)regs->orig_ax;
@@ -393,14 +398,13 @@
 	syscall_return_slowpath(regs);
 }
 
-#ifdef CONFIG_X86_64
-/* Handles INT80 on 64-bit kernels */
-__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
+/* Handles int $0x80 */
+__visible void do_int80_syscall_32(struct pt_regs *regs)
 {
+	enter_from_user_mode();
 	local_irq_enable();
 	do_syscall_32_irqs_on(regs);
 }
-#endif
 
 /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
 __visible long do_fast_syscall_32(struct pt_regs *regs)
@@ -420,12 +424,11 @@
 	 */
 	regs->ip = landing_pad;
 
-	/*
-	 * Fetch EBP from where the vDSO stashed it.
-	 *
-	 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
-	 */
+	enter_from_user_mode();
+
 	local_irq_enable();
+
+	/* Fetch EBP from where the vDSO stashed it. */
 	if (
 #ifdef CONFIG_X86_64
 		/*
@@ -443,9 +446,6 @@
 		/* User code screwed up. */
 		local_irq_disable();
 		regs->ax = -EFAULT;
-#ifdef CONFIG_CONTEXT_TRACKING
-		enter_from_user_mode();
-#endif
 		prepare_exit_to_usermode(regs);
 		return 0;	/* Keep it simple: use IRET. */
 	}
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 77d8c51..10868aa 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
 #include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
@@ -287,7 +287,58 @@
 END(resume_kernel)
 #endif
 
-	# SYSENTER  call handler stub
+GLOBAL(__begin_SYSENTER_singlestep_region)
+/*
+ * All code from here through __end_SYSENTER_singlestep_region is subject
+ * to being single-stepped if a user program sets TF and executes SYSENTER.
+ * There is absolutely nothing that we can do to prevent this from happening
+ * (thanks Intel!).  To keep our handling of this situation as simple as
+ * possible, we handle TF just like AC and NT, except that our #DB handler
+ * will ignore all of the single-step traps generated in this range.
+ */
+
+#ifdef CONFIG_XEN
+/*
+ * Xen doesn't set %esp to be precisely what the normal SYSENTER
+ * entry point expects, so fix it up before using the normal path.
+ */
+ENTRY(xen_sysenter_target)
+	addl	$5*4, %esp			/* remove xen-provided frame */
+	jmp	sysenter_past_esp
+#endif
+
+/*
+ * 32-bit SYSENTER entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * if X86_FEATURE_SEP is available.  This is the preferred system call
+ * entry on 32-bit systems.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO.  In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction.  This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old EIP (!!!), ESP, or EFLAGS.
+ *
+ * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
+ * user and/or vm86 state), we explicitly disable the SYSENTER
+ * instruction in vm86 mode by reprogramming the MSRs.
+ *
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ */
 ENTRY(entry_SYSENTER_32)
 	movl	TSS_sysenter_sp0(%esp), %esp
 sysenter_past_esp:
@@ -301,6 +352,29 @@
 	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
 
 	/*
+	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
+	 * and TF ourselves.  To save a few cycles, we can check whether
+	 * either was set instead of doing an unconditional popfq.
+	 * This needs to happen before enabling interrupts so that
+	 * we don't get preempted with NT set.
+	 *
+	 * If TF is set, we will single-step all the way to here -- do_debug
+	 * will ignore all the traps.  (Yes, this is slow, but so is
+	 * single-stepping in general.  This allows us to avoid having
+	 * a more complicated code to handle the case where a user program
+	 * forces us to single-step through the SYSENTER entry code.)
+	 *
+	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
+	 * out-of-line as an optimization: NT is unlikely to be set in the
+	 * majority of the cases and instead of polluting the I$ unnecessarily,
+	 * we're keeping that code behind a branch which will predict as
+	 * not-taken and therefore its instructions won't be fetched.
+	 */
+	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
+	jnz	.Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
+	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
 	 * turned them off.
 	 */
@@ -326,6 +400,15 @@
 	popl	%eax			/* pt_regs->ax */
 
 	/*
+	 * Restore all flags except IF. (We restore IF separately because
+	 * STI gives a one-instruction window in which we won't be interrupted,
+	 * whereas POPF does not.)
+	 */
+	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
+	btr	$X86_EFLAGS_IF_BIT, (%esp)
+	popfl
+
+	/*
 	 * Return back to the vDSO, which will pop ecx and edx.
 	 * Don't bother with DS and ES (they already contain __USER_DS).
 	 */
@@ -338,28 +421,63 @@
 .popsection
 	_ASM_EXTABLE(1b, 2b)
 	PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+	pushl	$X86_EFLAGS_FIXED
+	popfl
+	jmp	.Lsysenter_flags_fixed
+GLOBAL(__end_SYSENTER_singlestep_region)
 ENDPROC(entry_SYSENTER_32)
 
-	# system call handler stub
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction.  INT $0x80 lands here.
+ *
+ * This entry point can be used by any 32-bit perform system calls.
+ * Instances of INT $0x80 can be found inline in various programs and
+ * libraries.  It is also used by the vDSO's __kernel_vsyscall
+ * fallback for hardware that doesn't support a faster entry method.
+ * Restarted 32-bit system calls also fall back to INT $0x80
+ * regardless of what instruction was originally used to do the system
+ * call.  (64-bit programs can use INT $0x80 as well, but they can
+ * only run on 64-bit kernels and therefore land in
+ * entry_INT80_compat.)
+ *
+ * This is considered a slow path.  It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6
+ */
 ENTRY(entry_INT80_32)
 	ASM_CLAC
 	pushl	%eax			/* pt_regs->orig_ax */
 	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
 
 	/*
-	 * User mode is traced as though IRQs are on.  Unlike the 64-bit
-	 * case, INT80 is a trap gate on 32-bit kernels, so interrupts
-	 * are already on (unless user code is messing around with iopl).
+	 * User mode is traced as though IRQs are on, and the interrupt gate
+	 * turned them off.
 	 */
+	TRACE_IRQS_OFF
 
 	movl	%esp, %eax
-	call	do_syscall_32_irqs_on
+	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
 restore_all:
 	TRACE_IRQS_IRET
 restore_all_notrace:
 #ifdef CONFIG_X86_ESPFIX32
+	ALTERNATIVE	"jmp restore_nocheck", "", X86_BUG_ESPFIX
+
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
 	/*
 	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -386,19 +504,6 @@
 
 #ifdef CONFIG_X86_ESPFIX32
 ldt_ss:
-#ifdef CONFIG_PARAVIRT
-	/*
-	 * The kernel can't run on a non-flat stack if paravirt mode
-	 * is active.  Rather than try to fixup the high bits of
-	 * ESP, bypass this code entirely.  This may break DOSemu
-	 * and/or Wine support in a paravirt VM, although the option
-	 * is still available to implement the setting of the high
-	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
-	 */
-	cmpl	$0, pv_info+PARAVIRT_enabled
-	jne	restore_nocheck
-#endif
-
 /*
  * Setup and switch to ESPFIX stack
  *
@@ -631,14 +736,6 @@
 END(spurious_interrupt_bug)
 
 #ifdef CONFIG_XEN
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-ENTRY(xen_sysenter_target)
-	addl	$5*4, %esp			/* remove xen-provided frame */
-	jmp	sysenter_past_esp
-
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
@@ -938,51 +1035,48 @@
 	jmp	ret_from_exception
 END(page_fault)
 
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
-	cmpw	$__KERNEL_CS, 4(%esp)
-	jne	\ok
-\label:
-	movl	TSS_sysenter_sp0 + \offset(%esp), %esp
-	pushfl
-	pushl	$__KERNEL_CS
-	pushl	$sysenter_past_esp
-.endm
-
 ENTRY(debug)
+	/*
+	 * #DB can happen at the first instruction of
+	 * entry_SYSENTER_32 or in Xen's SYSENTER prologue.  If this
+	 * happens, then we will be running on a very small stack.  We
+	 * need to detect this condition and switch to the thread
+	 * stack before calling any C code at all.
+	 *
+	 * If you edit this code, keep in mind that NMIs can happen in here.
+	 */
 	ASM_CLAC
-	cmpl	$entry_SYSENTER_32, (%esp)
-	jne	debug_stack_correct
-	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
-	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# error code 0
 	movl	%esp, %eax			# pt_regs pointer
+
+	/* Are we currently on the SYSENTER stack? */
+	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
+	cmpl	$SIZEOF_SYSENTER_stack, %ecx
+	jb	.Ldebug_from_sysenter_stack
+
+	TRACE_IRQS_OFF
 	call	do_debug
 	jmp	ret_from_exception
+
+.Ldebug_from_sysenter_stack:
+	/* We're on the SYSENTER stack.  Switch off. */
+	movl	%esp, %ebp
+	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
+	TRACE_IRQS_OFF
+	call	do_debug
+	movl	%ebp, %esp
+	jmp	ret_from_exception
 END(debug)
 
 /*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got  an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
+ * NMI is doubly nasty.  It can happen on the first instruction of
+ * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
+ * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
+ * switched stacks.  We handle both conditions by simply checking whether we
+ * interrupted kernel code running on the SYSENTER stack.
  */
 ENTRY(nmi)
 	ASM_CLAC
@@ -993,41 +1087,32 @@
 	popl	%eax
 	je	nmi_espfix_stack
 #endif
-	cmpl	$entry_SYSENTER_32, (%esp)
-	je	nmi_stack_fixup
-	pushl	%eax
-	movl	%esp, %eax
-	/*
-	 * Do not access memory above the end of our stack page,
-	 * it might not exist.
-	 */
-	andl	$(THREAD_SIZE-1), %eax
-	cmpl	$(THREAD_SIZE-20), %eax
-	popl	%eax
-	jae	nmi_stack_correct
-	cmpl	$entry_SYSENTER_32, 12(%esp)
-	je	nmi_debug_stack_check
-nmi_stack_correct:
-	pushl	%eax
+
+	pushl	%eax				# pt_regs->orig_ax
 	SAVE_ALL
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
+
+	/* Are we currently on the SYSENTER stack? */
+	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
+	cmpl	$SIZEOF_SYSENTER_stack, %ecx
+	jb	.Lnmi_from_sysenter_stack
+
+	/* Not on SYSENTER stack. */
 	call	do_nmi
 	jmp	restore_all_notrace
 
-nmi_stack_fixup:
-	FIX_STACK 12, nmi_stack_correct, 1
-	jmp	nmi_stack_correct
-
-nmi_debug_stack_check:
-	cmpw	$__KERNEL_CS, 16(%esp)
-	jne	nmi_stack_correct
-	cmpl	$debug, (%esp)
-	jb	nmi_stack_correct
-	cmpl	$debug_esp_fix_insn, (%esp)
-	ja	nmi_stack_correct
-	FIX_STACK 24, nmi_stack_correct, 1
-	jmp	nmi_stack_correct
+.Lnmi_from_sysenter_stack:
+	/*
+	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
+	 * is using the thread stack right now, so it's safe for us to use it.
+	 */
+	movl	%esp, %ebp
+	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
+	call	do_nmi
+	movl	%ebp, %esp
+	jmp	restore_all_notrace
 
 #ifdef CONFIG_X86_ESPFIX32
 nmi_espfix_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3c..858b555 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -103,6 +103,16 @@
 /*
  * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
+ * This is the only entry point used for 64-bit system calls.  The
+ * hardware interface is reasonably well designed and the register to
+ * argument mapping Linux uses fits well with the registers that are
+ * available when SYSCALL is used.
+ *
+ * SYSCALL instructions can be found inlined in libc implementations as
+ * well as some other programs and libraries.  There are also a handful
+ * of SYSCALL instructions in the vDSO used, for example, as a
+ * clock_gettimeofday fallback.
+ *
  * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
  * then loads new ss, cs, and rip from previously programmed MSRs.
  * rflags gets masked by a value from another MSR (so CLD and CLAC
@@ -145,17 +155,11 @@
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
+	TRACE_IRQS_OFF
+
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER_DS			/* pt_regs->ss */
 	pushq	PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
-	/*
-	 * Re-enable interrupts.
-	 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
-	 * must execute atomically in the face of possible interrupt-driven
-	 * task preemption. We must enable interrupts only after we're done
-	 * with using rsp_scratch:
-	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
@@ -171,9 +175,21 @@
 	pushq	%r11				/* pt_regs->r11 */
 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
 
-	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	tracesys
+	/*
+	 * If we need to do entry work or if we guess we'll need to do
+	 * exit work, go straight to the slow path.
+	 */
+	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+	jnz	entry_SYSCALL64_slow_path
+
 entry_SYSCALL_64_fastpath:
+	/*
+	 * Easy case: enable interrupts and issue the syscall.  If the syscall
+	 * needs pt_regs, we'll call a stub that disables interrupts again
+	 * and jumps to the slow path.
+	 */
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
 #if __SYSCALL_MASK == ~0
 	cmpq	$__NR_syscall_max, %rax
 #else
@@ -182,103 +198,56 @@
 #endif
 	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
 	movq	%r10, %rcx
+
+	/*
+	 * This call instruction is handled specially in stub_ptregs_64.
+	 * It might end up jumping to the slow path.  If it jumps, RAX
+	 * and all argument registers are clobbered.
+	 */
 	call	*sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
 	movq	%rax, RAX(%rsp)
 1:
-/*
- * Syscall return path ending with SYSRET (fast path).
- * Has incompletely filled pt_regs.
- */
-	LOCKDEP_SYS_EXIT
+
 	/*
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
+	 * If we get here, then we know that pt_regs is clean for SYSRET64.
+	 * If we see that no exit work is required (which we are required
+	 * to check with IRQs off), then we can go straight to SYSRET64.
 	 */
 	DISABLE_INTERRUPTS(CLBR_NONE)
-
-	/*
-	 * We must check ti flags with interrupts (or at least preemption)
-	 * off because we must *never* return to userspace without
-	 * processing exit work that is enqueued if we're preempted here.
-	 * In particular, returning to userspace with any of the one-shot
-	 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
-	 * very bad.
-	 */
+	TRACE_IRQS_OFF
 	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	int_ret_from_sys_call_irqs_off	/* Go to the slow path */
+	jnz	1f
 
-	RESTORE_C_REGS_EXCEPT_RCX_R11
+	LOCKDEP_SYS_EXIT
+	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
 	movq	EFLAGS(%rsp), %r11
+	RESTORE_C_REGS_EXCEPT_RCX_R11
 	movq	RSP(%rsp), %rsp
-	/*
-	 * 64-bit SYSRET restores rip from rcx,
-	 * rflags from r11 (but RF and VM bits are forced to 0),
-	 * cs and ss are loaded from MSRs.
-	 * Restoration of rflags re-enables interrupts.
-	 *
-	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
-	 * descriptor is not reinitialized.  This means that we should
-	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
-	 * exit the kernel, and re-enter using an interrupt vector.  (All
-	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
-	 * from happening by reloading SS in __switch_to.  (Actually
-	 * detecting the failure in 64-bit userspace is tricky but can be
-	 * done.)
-	 */
 	USERGS_SYSRET64
 
-GLOBAL(int_ret_from_sys_call_irqs_off)
+1:
+	/*
+	 * The fast path looked good when we started, but something changed
+	 * along the way and we need to switch to the slow path.  Calling
+	 * raise(3) will trigger this, for example.  IRQs are off.
+	 */
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	jmp int_ret_from_sys_call
-
-	/* Do syscall entry tracing */
-tracesys:
-	movq	%rsp, %rdi
-	movl	$AUDIT_ARCH_X86_64, %esi
-	call	syscall_trace_enter_phase1
-	test	%rax, %rax
-	jnz	tracesys_phase2			/* if needed, run the slow path */
-	RESTORE_C_REGS_EXCEPT_RAX		/* else restore clobbered regs */
-	movq	ORIG_RAX(%rsp), %rax
-	jmp	entry_SYSCALL_64_fastpath	/* and return to the fast path */
-
-tracesys_phase2:
-	SAVE_EXTRA_REGS
-	movq	%rsp, %rdi
-	movl	$AUDIT_ARCH_X86_64, %esi
-	movq	%rax, %rdx
-	call	syscall_trace_enter_phase2
-
-	/*
-	 * Reload registers from stack in case ptrace changed them.
-	 * We don't reload %rax because syscall_trace_entry_phase2() returned
-	 * the value it wants us to use in the table lookup.
-	 */
-	RESTORE_C_REGS_EXCEPT_RAX
-	RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
-	cmpq	$__NR_syscall_max, %rax
-#else
-	andl	$__SYSCALL_MASK, %eax
-	cmpl	$__NR_syscall_max, %eax
-#endif
-	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
-	movq	%r10, %rcx			/* fixup for C */
-	call	*sys_call_table(, %rax, 8)
-	movq	%rax, RAX(%rsp)
-1:
-	/* Use IRET because user could have changed pt_regs->foo */
-
-/*
- * Syscall return path ending with IRET.
- * Has correct iret frame.
- */
-GLOBAL(int_ret_from_sys_call)
 	SAVE_EXTRA_REGS
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
+	jmp	return_from_SYSCALL_64
+
+entry_SYSCALL64_slow_path:
+	/* IRQs are off. */
+	SAVE_EXTRA_REGS
+	movq	%rsp, %rdi
+	call	do_syscall_64		/* returns with IRQs disabled */
+
+return_from_SYSCALL_64:
 	RESTORE_EXTRA_REGS
 	TRACE_IRQS_IRETQ		/* we're about to change IF */
 
@@ -355,83 +324,45 @@
 	jmp	restore_c_regs_and_iret
 END(entry_SYSCALL_64)
 
-
-	.macro FORK_LIKE func
-ENTRY(stub_\func)
-	SAVE_EXTRA_REGS 8
-	jmp	sys_\func
-END(stub_\func)
-	.endm
-
-	FORK_LIKE  clone
-	FORK_LIKE  fork
-	FORK_LIKE  vfork
-
-ENTRY(stub_execve)
-	call	sys_execve
-return_from_execve:
-	testl	%eax, %eax
-	jz	1f
-	/* exec failed, can use fast SYSRET code path in this case */
-	ret
-1:
-	/* must use IRET code path (pt_regs->cs may have changed) */
-	addq	$8, %rsp
-	ZERO_EXTRA_REGS
-	movq	%rax, RAX(%rsp)
-	jmp	int_ret_from_sys_call
-END(stub_execve)
-/*
- * Remaining execve stubs are only 7 bytes long.
- * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
- */
-	.align	8
-GLOBAL(stub_execveat)
-	call	sys_execveat
-	jmp	return_from_execve
-END(stub_execveat)
-
-#if defined(CONFIG_X86_X32_ABI)
-	.align	8
-GLOBAL(stub_x32_execve)
-	call	compat_sys_execve
-	jmp	return_from_execve
-END(stub_x32_execve)
-	.align	8
-GLOBAL(stub_x32_execveat)
-	call	compat_sys_execveat
-	jmp	return_from_execve
-END(stub_x32_execveat)
-#endif
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
+ENTRY(stub_ptregs_64)
 	/*
-	 * SAVE_EXTRA_REGS result is not normally needed:
-	 * sigreturn overwrites all pt_regs->GPREGS.
-	 * But sigreturn can fail (!), and there is no easy way to detect that.
-	 * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
-	 * we SAVE_EXTRA_REGS here.
+	 * Syscalls marked as needing ptregs land here.
+	 * If we are on the fast path, we need to save the extra regs,
+	 * which we achieve by trying again on the slow path.  If we are on
+	 * the slow path, the extra regs are already saved.
+	 *
+	 * RAX stores a pointer to the C function implementing the syscall.
+	 * IRQs are on.
 	 */
-	SAVE_EXTRA_REGS 8
-	call	sys_rt_sigreturn
-return_from_stub:
-	addq	$8, %rsp
-	RESTORE_EXTRA_REGS
-	movq	%rax, RAX(%rsp)
-	jmp	int_ret_from_sys_call
-END(stub_rt_sigreturn)
+	cmpq	$.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+	jne	1f
 
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
-	SAVE_EXTRA_REGS 8
-	call	sys32_x32_rt_sigreturn
-	jmp	return_from_stub
-END(stub_x32_rt_sigreturn)
-#endif
+	/*
+	 * Called from fast path -- disable IRQs again, pop return address
+	 * and jump to slow path
+	 */
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	popq	%rax
+	jmp	entry_SYSCALL64_slow_path
+
+1:
+	/* Called from C */
+	jmp	*%rax				/* called from C */
+END(stub_ptregs_64)
+
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+	leaq	\func(%rip), %rax
+	jmp	stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
 
 /*
  * A newly forked process directly context switches into this address.
@@ -439,7 +370,6 @@
  * rdi: prev task we switched from
  */
 ENTRY(ret_from_fork)
-
 	LOCK ; btr $TIF_FORK, TI_flags(%r8)
 
 	pushq	$0x0002
@@ -447,28 +377,32 @@
 
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
-	RESTORE_EXTRA_REGS
-
 	testb	$3, CS(%rsp)			/* from kernel_thread? */
+	jnz	1f
 
 	/*
-	 * By the time we get here, we have no idea whether our pt_regs,
-	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-	 * the slow path, or one of the 32-bit compat paths.
-	 * Use IRET code path to return, since it can safely handle
-	 * all of the above.
+	 * We came from kernel_thread.  This code path is quite twisted, and
+	 * someone should clean it up.
+	 *
+	 * copy_thread_tls stashes the function pointer in RBX and the
+	 * parameter to be passed in RBP.  The called function is permitted
+	 * to call do_execve and thereby jump to user mode.
 	 */
-	jnz	int_ret_from_sys_call
-
-	/*
-	 * We came from kernel_thread
-	 * nb: we depend on RESTORE_EXTRA_REGS above
-	 */
-	movq	%rbp, %rdi
-	call	*%rbx
+	movq	RBP(%rsp), %rdi
+	call	*RBX(%rsp)
 	movl	$0, RAX(%rsp)
-	RESTORE_EXTRA_REGS
-	jmp	int_ret_from_sys_call
+
+	/*
+	 * Fall through as though we're exiting a syscall.  This makes a
+	 * twisted sort of sense if we just called do_execve.
+	 */
+
+1:
+	movq	%rsp, %rdi
+	call	syscall_return_slowpath	/* returns with IRQs disabled */
+	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
+	SWAPGS
+	jmp	restore_regs_and_iret
 END(ret_from_fork)
 
 /*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index ff1c6d6..847f2f0 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -19,12 +19,21 @@
 	.section .entry.text, "ax"
 
 /*
- * 32-bit SYSENTER instruction entry.
+ * 32-bit SYSENTER entry.
  *
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
- * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on Intel CPUs.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO.  In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction.  This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
  * SYSENTER does not save anything on the stack,
- * and does not save old rip (!!!) and rflags.
+ * and does not save old RIP (!!!), RSP, or RFLAGS.
  *
  * Arguments:
  * eax  system call number
@@ -35,10 +44,6 @@
  * edi  arg5
  * ebp  user stack
  * 0(%ebp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
  */
 ENTRY(entry_SYSENTER_compat)
 	/* Interrupts are off on entry. */
@@ -66,8 +71,6 @@
 	 */
 	pushfq				/* pt_regs->flags (except IF = 0) */
 	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
-	ASM_CLAC			/* Clear AC after saving FLAGS */
-
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	xorq    %r8,%r8
 	pushq	%r8			/* pt_regs->ip = 0 (placeholder) */
@@ -90,19 +93,25 @@
 	cld
 
 	/*
-	 * Sysenter doesn't filter flags, so we need to clear NT
+	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
 	 * ourselves.  To save a few cycles, we can check whether
-	 * NT was set instead of doing an unconditional popfq.
+	 * either was set instead of doing an unconditional popfq.
 	 * This needs to happen before enabling interrupts so that
 	 * we don't get preempted with NT set.
 	 *
+	 * If TF is set, we will single-step all the way to here -- do_debug
+	 * will ignore all the traps.  (Yes, this is slow, but so is
+	 * single-stepping in general.  This allows us to avoid having
+	 * a more complicated code to handle the case where a user program
+	 * forces us to single-step through the SYSENTER entry code.)
+	 *
 	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
 	 * out-of-line as an optimization: NT is unlikely to be set in the
 	 * majority of the cases and instead of polluting the I$ unnecessarily,
 	 * we're keeping that code behind a branch which will predict as
 	 * not-taken and therefore its instructions won't be fetched.
 	 */
-	testl	$X86_EFLAGS_NT, EFLAGS(%rsp)
+	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
 	jnz	.Lsysenter_fix_flags
 .Lsysenter_flags_fixed:
 
@@ -123,20 +132,42 @@
 	pushq	$X86_EFLAGS_FIXED
 	popfq
 	jmp	.Lsysenter_flags_fixed
+GLOBAL(__end_entry_SYSENTER_compat)
 ENDPROC(entry_SYSENTER_compat)
 
 /*
- * 32-bit SYSCALL instruction entry.
+ * 32-bit SYSCALL entry.
  *
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on AMD CPUs.
  *
- * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * The SYSCALL instruction, in principle, should *only* occur in the
+ * vDSO.  In practice, it appears that this really is the case.
+ * As evidence:
+ *
+ *  - The calling convention for SYSCALL has changed several times without
+ *    anyone noticing.
+ *
+ *  - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
+ *    user task that did SYSCALL without immediately reloading SS
+ *    would randomly crash.
+ *
+ *  - Most programmers do not directly target AMD CPUs, and the 32-bit
+ *    SYSCALL instruction does not exist on Intel CPUs.  Even on AMD
+ *    CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
+ *    because the SYSCALL instruction in legacy/native 32-bit mode (as
+ *    opposed to compat mode) is sufficiently poorly designed as to be
+ *    essentially unusable.
+ *
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
+ * programmed MSRs.  RFLAGS gets masked by a value from another MSR
+ * (so CLD and CLAC are not needed).  SYSCALL does not save anything on
+ * the stack and does not change RSP.
+ *
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
  * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
  * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
  * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
  *
@@ -236,7 +267,21 @@
 END(entry_SYSCALL_compat)
 
 /*
- * Emulated IA32 system calls via int 0x80.
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction.  INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls.  Instances of INT $0x80 can be found inline in
+ * various programs and libraries.  It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method.  Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path.  It is not used by most libc
+ * implementations on modern hardware except during process startup.
  *
  * Arguments:
  * eax  system call number
@@ -245,22 +290,14 @@
  * edx  arg3
  * esi  arg4
  * edi  arg5
- * ebp  arg6	(note: not saved in the stack frame, should not be touched)
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except eax must be saved (but ptrace may violate that).
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
+ * ebp  arg6
  */
-
 ENTRY(entry_INT80_compat)
 	/*
 	 * Interrupts are off on entry.
 	 */
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	ASM_CLAC			/* Do this early to minimize exposure */
 	SWAPGS
 
 	/*
@@ -299,7 +336,7 @@
 	TRACE_IRQS_OFF
 
 	movq	%rsp, %rdi
-	call	do_syscall_32_irqs_off
+	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
 	/* Go back to user mode. */
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 9a66498..8f895ee 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -6,17 +6,11 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#ifdef CONFIG_IA32_EMULATION
-#define SYM(sym, compat) compat
-#else
-#define SYM(sym, compat) sym
-#endif
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
+#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 41283d2..9dbc5ab 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,19 +6,14 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
 
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
 
-#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
 
 extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index dc1040a..2e5b565 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -21,7 +21,7 @@
 12	common	brk			sys_brk
 13	64	rt_sigaction		sys_rt_sigaction
 14	common	rt_sigprocmask		sys_rt_sigprocmask
-15	64	rt_sigreturn		stub_rt_sigreturn
+15	64	rt_sigreturn		sys_rt_sigreturn/ptregs
 16	64	ioctl			sys_ioctl
 17	common	pread64			sys_pread64
 18	common	pwrite64		sys_pwrite64
@@ -62,10 +62,10 @@
 53	common	socketpair		sys_socketpair
 54	64	setsockopt		sys_setsockopt
 55	64	getsockopt		sys_getsockopt
-56	common	clone			stub_clone
-57	common	fork			stub_fork
-58	common	vfork			stub_vfork
-59	64	execve			stub_execve
+56	common	clone			sys_clone/ptregs
+57	common	fork			sys_fork/ptregs
+58	common	vfork			sys_vfork/ptregs
+59	64	execve			sys_execve/ptregs
 60	common	exit			sys_exit
 61	common	wait4			sys_wait4
 62	common	kill			sys_kill
@@ -178,7 +178,7 @@
 169	common	reboot			sys_reboot
 170	common	sethostname		sys_sethostname
 171	common	setdomainname		sys_setdomainname
-172	common	iopl			sys_iopl
+172	common	iopl			sys_iopl/ptregs
 173	common	ioperm			sys_ioperm
 174	64	create_module
 175	common	init_module		sys_init_module
@@ -328,7 +328,7 @@
 319	common	memfd_create		sys_memfd_create
 320	common	kexec_file_load		sys_kexec_file_load
 321	common	bpf			sys_bpf
-322	64	execveat		stub_execveat
+322	64	execveat		sys_execveat/ptregs
 323	common	userfaultfd		sys_userfaultfd
 324	common	membarrier		sys_membarrier
 325	common	mlock2			sys_mlock2
@@ -339,14 +339,14 @@
 # for native 64-bit operation.
 #
 512	x32	rt_sigaction		compat_sys_rt_sigaction
-513	x32	rt_sigreturn		stub_x32_rt_sigreturn
+513	x32	rt_sigreturn		sys32_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
 515	x32	readv			compat_sys_readv
 516	x32	writev			compat_sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
-520	x32	execve			stub_x32_execve
+520	x32	execve			compat_sys_execve/ptregs
 521	x32	ptrace			compat_sys_ptrace
 522	x32	rt_sigpending		compat_sys_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
@@ -371,4 +371,4 @@
 542	x32	getsockopt		compat_sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
-545	x32	execveat		stub_x32_execveat
+545	x32	execveat		compat_sys_execveat/ptregs
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec..cd3d301 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -3,13 +3,63 @@
 in="$1"
 out="$2"
 
+syscall_macro() {
+    abi="$1"
+    nr="$2"
+    entry="$3"
+
+    # Entry can be either just a function name or "function/qualifier"
+    real_entry="${entry%%/*}"
+    qualifier="${entry:${#real_entry}}"		# Strip the function name
+    qualifier="${qualifier:1}"			# Strip the slash, if any
+
+    echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+}
+
+emit() {
+    abi="$1"
+    nr="$2"
+    entry="$3"
+    compat="$4"
+
+    if [ "$abi" == "64" -a -n "$compat" ]; then
+	echo "a compat entry for a 64-bit syscall makes no sense" >&2
+	exit 1
+    fi
+
+    if [ -z "$compat" ]; then
+	if [ -n "$entry" ]; then
+	    syscall_macro "$abi" "$nr" "$entry"
+	fi
+    else
+	echo "#ifdef CONFIG_X86_32"
+	if [ -n "$entry" ]; then
+	    syscall_macro "$abi" "$nr" "$entry"
+	fi
+	echo "#else"
+	syscall_macro "$abi" "$nr" "$compat"
+	echo "#endif"
+    fi
+}
+
 grep '^[0-9]' "$in" | sort -n | (
     while read nr abi name entry compat; do
 	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
-	if [ -n "$compat" ]; then
-	    echo "__SYSCALL_${abi}($nr, $entry, $compat)"
-	elif [ -n "$entry" ]; then
-	    echo "__SYSCALL_${abi}($nr, $entry, $entry)"
+	if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
+	    # COMMON is the same as 64, except that we don't expect X32
+	    # programs to use it.  Our expectation has nothing to do with
+	    # any generated code, so treat them the same.
+	    emit 64 "$nr" "$entry" "$compat"
+	elif [ "$abi" == "X32" ]; then
+	    # X32 is equivalent to 64 on an X32-compatible kernel.
+	    echo "#ifdef CONFIG_X86_X32_ABI"
+	    emit 64 "$nr" "$entry" "$compat"
+	    echo "#endif"
+	elif [ "$abi" == "I386" ]; then
+	    emit "$abi" "$nr" "$entry" "$compat"
+	else
+	    echo "Unknown abi $abi" >&2
+	    exit 1
 	fi
     done
 ) > "$out"
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987..63a03bb 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -140,7 +140,7 @@
 	fprintf(outfile, "#include <asm/vdso.h>\n");
 	fprintf(outfile, "\n");
 	fprintf(outfile,
-		"static unsigned char raw_data[%lu] __page_aligned_data = {",
+		"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
 		mapping_size);
 	for (j = 0; j < stripped_len; j++) {
 		if (j % 10 == 0)
@@ -150,16 +150,9 @@
 	}
 	fprintf(outfile, "\n};\n\n");
 
-	fprintf(outfile, "static struct page *pages[%lu];\n\n",
-		mapping_size / 4096);
-
 	fprintf(outfile, "const struct vdso_image %s = {\n", name);
 	fprintf(outfile, "\t.data = raw_data,\n");
 	fprintf(outfile, "\t.size = %lu,\n", mapping_size);
-	fprintf(outfile, "\t.text_mapping = {\n");
-	fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
-	fprintf(outfile, "\t\t.pages = pages,\n");
-	fprintf(outfile, "\t},\n");
 	if (alt_sec) {
 		fprintf(outfile, "\t.alt = %lu,\n",
 			(unsigned long)GET_LE(&alt_sec->sh_offset));
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a..7853b53 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/mm_types.h>
 
-#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/vdso.h>
 
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d929..0109ac6 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -3,7 +3,7 @@
 */
 
 #include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8f69e2..10f7045 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,6 +20,7 @@
 #include <asm/page.h>
 #include <asm/hpet.h>
 #include <asm/desc.h>
+#include <asm/cpufeature.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -27,13 +28,7 @@
 
 void __init init_vdso_image(const struct vdso_image *image)
 {
-	int i;
-	int npages = (image->size) / PAGE_SIZE;
-
 	BUG_ON(image->size % PAGE_SIZE != 0);
-	for (i = 0; i < npages; i++)
-		image->text_mapping.pages[i] =
-			virt_to_page(image->data + i*PAGE_SIZE);
 
 	apply_alternatives((struct alt_instr *)(image->data + image->alt),
 			   (struct alt_instr *)(image->data + image->alt +
@@ -90,18 +85,87 @@
 #endif
 }
 
+static int vdso_fault(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+
+	if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping text_mapping = {
+	.name = "[vdso]",
+	.fault = vdso_fault,
+};
+
+static int vvar_fault(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+	long sym_offset;
+	int ret = -EFAULT;
+
+	if (!image)
+		return VM_FAULT_SIGBUS;
+
+	sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
+		image->sym_vvar_start;
+
+	/*
+	 * Sanity check: a symbol offset of zero means that the page
+	 * does not exist for this vdso image, not that the page is at
+	 * offset zero relative to the text mapping.  This should be
+	 * impossible here, because sym_offset should only be zero for
+	 * the page past the end of the vvar mapping.
+	 */
+	if (sym_offset == 0)
+		return VM_FAULT_SIGBUS;
+
+	if (sym_offset == image->sym_vvar_page) {
+		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+	} else if (sym_offset == image->sym_hpet_page) {
+#ifdef CONFIG_HPET_TIMER
+		if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
+			ret = vm_insert_pfn_prot(
+				vma,
+				(unsigned long)vmf->virtual_address,
+				hpet_address >> PAGE_SHIFT,
+				pgprot_noncached(PAGE_READONLY));
+		}
+#endif
+	} else if (sym_offset == image->sym_pvclock_page) {
+		struct pvclock_vsyscall_time_info *pvti =
+			pvclock_pvti_cpu0_va();
+		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+			ret = vm_insert_pfn(
+				vma,
+				(unsigned long)vmf->virtual_address,
+				__pa(pvti) >> PAGE_SHIFT);
+		}
+	}
+
+	if (ret == 0 || ret == -EBUSY)
+		return VM_FAULT_NOPAGE;
+
+	return VM_FAULT_SIGBUS;
+}
+
 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
-	static struct page *no_pages[] = {NULL};
-	static struct vm_special_mapping vvar_mapping = {
+	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
-		.pages = no_pages,
+		.fault = vvar_fault,
 	};
-	struct pvclock_vsyscall_time_info *pvti;
 
 	if (calculate_addr) {
 		addr = vdso_addr(current->mm->start_stack,
@@ -121,6 +185,7 @@
 
 	text_start = addr - image->sym_vvar_start;
 	current->mm->context.vdso = (void __user *)text_start;
+	current->mm->context.vdso_image = image;
 
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -130,7 +195,7 @@
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &image->text_mapping);
+				       &text_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
@@ -140,7 +205,8 @@
 	vma = _install_special_mapping(mm,
 				       addr,
 				       -image->sym_vvar_start,
-				       VM_READ|VM_MAYREAD,
+				       VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+				       VM_PFNMAP,
 				       &vvar_mapping);
 
 	if (IS_ERR(vma)) {
@@ -148,41 +214,6 @@
 		goto up_fail;
 	}
 
-	if (image->sym_vvar_page)
-		ret = remap_pfn_range(vma,
-				      text_start + image->sym_vvar_page,
-				      __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
-				      PAGE_SIZE,
-				      PAGE_READONLY);
-
-	if (ret)
-		goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
-	if (hpet_address && image->sym_hpet_page) {
-		ret = io_remap_pfn_range(vma,
-			text_start + image->sym_hpet_page,
-			hpet_address >> PAGE_SHIFT,
-			PAGE_SIZE,
-			pgprot_noncached(PAGE_READONLY));
-
-		if (ret)
-			goto up_fail;
-	}
-#endif
-
-	pvti = pvclock_pvti_cpu0_va();
-	if (pvti && image->sym_pvclock_page) {
-		ret = remap_pfn_range(vma,
-				      text_start + image->sym_pvclock_page,
-				      __pa(pvti) >> PAGE_SHIFT,
-				      PAGE_SIZE,
-				      PAGE_READONLY);
-
-		if (ret)
-			goto up_fail;
-	}
-
 up_fail:
 	if (ret)
 		current->mm->context.vdso = NULL;
@@ -254,7 +285,7 @@
 #ifdef CONFIG_NUMA
 	node = cpu_to_node(cpu);
 #endif
-	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		write_rdtscp_aux((node << 12) | cpu);
 
 	/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e3304..0fb3a10 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,6 +16,8 @@
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
 
+int vclocks_used __read_mostly;
+
 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
 
 void update_vsyscall_tz(void)
@@ -26,12 +28,17 @@
 
 void update_vsyscall(struct timekeeper *tk)
 {
+	int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
 	struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
+	/* Mark the new vclock used. */
+	BUILD_BUG_ON(VCLOCK_MAX >= 32);
+	WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
 	gtod_write_begin(vdata);
 
 	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->vclock_mode	= vclock_mode;
 	vdata->cycle_last	= tk->tkr_mono.cycle_last;
 	vdata->mask		= tk->tkr_mono.mask;
 	vdata->mult		= tk->tkr_mono.mult;
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644
index 0000000..fdfea15
--- /dev/null
+++ b/arch/x86/events/Makefile
@@ -0,0 +1,13 @@
+obj-y			+= core.o
+
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
+obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/cstate.o intel/ds.o intel/knc.o 
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore_snb.o intel/uncore_snbep.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_amd.c
rename to arch/x86/events/amd/core.c
index 5861053..049ada8d 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/events/amd/core.c
@@ -5,7 +5,7 @@
 #include <linux/slab.h>
 #include <asm/apicdef.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_amd_ibs.c
rename to arch/x86/events/amd/ibs.c
index 989d3c2..51087c2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -14,7 +14,7 @@
 
 #include <asm/apic.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 static u32 ibs_caps;
 
@@ -670,7 +670,7 @@
 	perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
 
 	register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
-	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+	pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 
 	return 0;
 }
@@ -774,14 +774,14 @@
 		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
 		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
 			pci_dev_put(cpu_cfg);
-			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-			       "IBSCTL = 0x%08x\n", value);
+			pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+				 value);
 			return -EINVAL;
 		}
 	} while (1);
 
 	if (!nodes) {
-		printk(KERN_DEBUG "No CPU node configured for IBS\n");
+		pr_debug("No CPU node configured for IBS\n");
 		return -ENODEV;
 	}
 
@@ -810,7 +810,7 @@
 	preempt_enable();
 
 	if (offset == APIC_EILVT_NR_MAX) {
-		printk(KERN_DEBUG "No EILVT entry available\n");
+		pr_debug("No EILVT entry available\n");
 		return;
 	}
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_amd_iommu.c
rename to arch/x86/events/amd/iommu.c
index 97242a9..635e5eb 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -16,8 +16,8 @@
 #include <linux/cpumask.h>
 #include <linux/slab.h>
 
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
+#include "../perf_event.h"
+#include "iommu.h"
 
 #define COUNTER_SHIFT		16
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h
similarity index 100%
rename from arch/x86/kernel/cpu/perf_event_amd_iommu.h
rename to arch/x86/events/amd/iommu.h
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_amd_uncore.c
rename to arch/x86/events/amd/uncore.c
index 4974274..3db9569 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -323,6 +323,8 @@
 	return 0;
 
 fail:
+	if (amd_uncore_nb)
+		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 	kfree(uncore_nb);
 	return -ENOMEM;
 }
@@ -536,7 +538,7 @@
 		if (ret)
 			goto fail_nb;
 
-		printk(KERN_INFO "perf: AMD NB counters detected\n");
+		pr_info("perf: AMD NB counters detected\n");
 		ret = 0;
 	}
 
@@ -550,7 +552,7 @@
 		if (ret)
 			goto fail_l2;
 
-		printk(KERN_INFO "perf: AMD L2I counters detected\n");
+		pr_info("perf: AMD L2I counters detected\n");
 		ret = 0;
 	}
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event.c
rename to arch/x86/events/core.c
index 1b443db..5e830d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/events/core.c
@@ -254,15 +254,16 @@
 	 * We still allow the PMU driver to operate:
 	 */
 	if (bios_fail) {
-		printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
-		printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
+		pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+		pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+			      reg_fail, val_fail);
 	}
 
 	return true;
 
 msr_fail:
-	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
-	printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+	pr_cont("Broken PMU hardware detected, using software events only.\n");
+	pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 		boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
 		reg, val_new);
 
@@ -596,6 +597,19 @@
 	}
 }
 
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
 static void x86_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_bts.c
rename to arch/x86/events/intel/bts.c
index 2cad71d..b99dc92 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -26,7 +26,7 @@
 #include <asm-generic/sizes.h>
 #include <asm/perf_event.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 struct bts_ctx {
 	struct perf_output_handle	handle;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel.c
rename to arch/x86/events/intel/core.c
index a667078..68fa55b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/events/intel/core.c
@@ -18,7 +18,7 @@
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /*
  * Intel PerfMon, used on Core and later.
@@ -1502,7 +1502,15 @@
 };
 
 /*
- * Use from PMIs where the LBRs are already disabled.
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
  */
 static void __intel_pmu_disable_all(void)
 {
@@ -1884,6 +1892,16 @@
 	if (__test_and_clear_bit(62, (unsigned long *)&status)) {
 		handled++;
 		x86_pmu.drain_pebs(regs);
+		/*
+		 * There are cases where, even though, the PEBS ovfl bit is set
+		 * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+		 * overflow bits set for their counters. We must clear them
+		 * here because they have been processed as exact samples in
+		 * the drain_pebs() routine. They must not be processed again
+		 * in the for_each_bit_set() loop for regular samples below.
+		 */
+		status &= ~cpuc->pebs_enabled;
+		status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
 	}
 
 	/*
@@ -1929,7 +1947,10 @@
 		goto again;
 
 done:
-	__intel_pmu_enable_all(0, true);
+	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
+	if (cpuc->enabled)
+		__intel_pmu_enable_all(0, true);
+
 	/*
 	 * Only unmask the NMI after the overflow counters
 	 * have been reset. This avoids spurious NMIs on
@@ -1960,7 +1981,8 @@
 
 static int intel_alt_er(int idx, u64 config)
 {
-	int alt_idx;
+	int alt_idx = idx;
+
 	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
 		return idx;
 
@@ -2897,14 +2919,12 @@
 		return;
 
 	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
 		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
 			struct intel_shared_regs *pc;
 
 			pc = per_cpu(cpu_hw_events, i).shared_regs;
 			if (pc && pc->core_id == core_id) {
-				*onln = cpuc->shared_regs;
+				cpuc->kfree_on_online[0] = cpuc->shared_regs;
 				cpuc->shared_regs = pc;
 				break;
 			}
@@ -3397,6 +3417,7 @@
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		intel_pmu_pebs_data_source_nhm();
 		x86_add_quirk(intel_nehalem_quirk);
 
 		pr_cont("Nehalem events, ");
@@ -3460,6 +3481,7 @@
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		intel_pmu_pebs_data_source_nhm();
 		pr_cont("Westmere events, ");
 		break;
 
@@ -3582,7 +3604,7 @@
 		intel_pmu_lbr_init_hsw();
 
 		x86_pmu.event_constraints = intel_bdw_event_constraints;
-		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+		x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snbep_extra_regs;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
 		x86_pmu.pebs_prec_dist = true;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_intel_cqm.c
rename to arch/x86/events/intel/cqm.c
index a316ca9..93cb412 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -7,7 +7,7 @@
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define MSR_IA32_PQR_ASSOC	0x0c8f
 #define MSR_IA32_QM_CTR		0x0c8e
@@ -1244,15 +1244,12 @@
 
 static inline void cqm_pick_event_reader(int cpu)
 {
-	int phys_id = topology_physical_package_id(cpu);
-	int i;
+	int reader;
 
-	for_each_cpu(i, &cqm_cpumask) {
-		if (phys_id == topology_physical_package_id(i))
-			return;	/* already got reader for this socket */
-	}
-
-	cpumask_set_cpu(cpu, &cqm_cpumask);
+	/* First online cpu in package becomes the reader */
+	reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+	if (reader >= nr_cpu_ids)
+		cpumask_set_cpu(cpu, &cqm_cpumask);
 }
 
 static void intel_cqm_cpu_starting(unsigned int cpu)
@@ -1270,24 +1267,17 @@
 
 static void intel_cqm_cpu_exit(unsigned int cpu)
 {
-	int phys_id = topology_physical_package_id(cpu);
-	int i;
+	int target;
 
-	/*
-	 * Is @cpu a designated cqm reader?
-	 */
+	/* Is @cpu the current cqm reader for this package ? */
 	if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
 		return;
 
-	for_each_online_cpu(i) {
-		if (i == cpu)
-			continue;
+	/* Find another online reader in this package */
+	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
 
-		if (phys_id == topology_physical_package_id(i)) {
-			cpumask_set_cpu(i, &cqm_cpumask);
-			break;
-		}
-	}
+	if (target < nr_cpu_ids)
+		cpumask_set_cpu(target, &cqm_cpumask);
 }
 
 static int intel_cqm_cpu_notifier(struct notifier_block *nb,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_cstate.c
rename to arch/x86/events/intel/cstate.c
index 75a38b5..7946c42 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,7 +89,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
 static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c
similarity index 93%
rename from arch/x86/kernel/cpu/perf_event_intel_ds.c
rename to arch/x86/events/intel/ds.c
index 10602f0..ce7211a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,7 +5,7 @@
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
@@ -51,7 +51,8 @@
 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
 
-static const u64 pebs_data_source[] = {
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
 	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
 	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
 	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
@@ -70,6 +71,14 @@
 	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
 };
 
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+	pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
+	pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -269,7 +278,7 @@
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
+	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -286,7 +295,7 @@
 		per_cpu(insn_buffer, cpu) = ibuffer;
 	}
 
-	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+	max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
 
 	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
 	ds->pebs_index = ds->pebs_buffer_base;
@@ -722,6 +731,30 @@
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
+	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+	/* Allow all events as PEBS with no flags */
+	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
+
 struct event_constraint intel_skl_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
@@ -1319,19 +1352,28 @@
 
 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
 	if (x86_pmu.pebs) {
 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
 		int format = x86_pmu.intel_cap.pebs_format;
 
 		switch (format) {
 		case 0:
-			printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
+			pr_cont("PEBS fmt0%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+			/*
+			 * Using >PAGE_SIZE buffers makes the WRMSR to
+			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+			 * mysteriously hang on Core2.
+			 *
+			 * As a workaround, we don't do this.
+			 */
+			x86_pmu.pebs_buffer_size = PAGE_SIZE;
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
 			break;
 
 		case 1:
-			printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
+			pr_cont("PEBS fmt1%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
@@ -1351,7 +1393,7 @@
 			break;
 
 		default:
-			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
+			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
 			x86_pmu.pebs = 0;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_knc.c
rename to arch/x86/events/intel/knc.c
index 5b0c232..548d5f7 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -5,7 +5,7 @@
 
 #include <asm/hardirq.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 static const u64 knc_perfmon_event_map[] =
 {
@@ -263,7 +263,9 @@
 		goto again;
 
 done:
-	knc_pmu_enable_all(0);
+	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
+	if (cpuc->enabled)
+		knc_pmu_enable_all(0);
 
 	return handled;
 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_lbr.c
rename to arch/x86/events/intel/lbr.c
index 653f88d..69dd118 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -5,7 +5,7 @@
 #include <asm/msr.h>
 #include <asm/insn.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 enum {
 	LBR_FORMAT_32		= 0x00,
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_p4.c
rename to arch/x86/events/intel/p4.c
index f2e5678..0a5ede1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -13,7 +13,7 @@
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define P4_CNTR_LIMIT 3
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_p6.c
rename to arch/x86/events/intel/p6.c
index 7c1a0c0..1f5c47a 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -1,7 +1,7 @@
 #include <linux/perf_event.h>
 #include <linux/types.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /*
  * Not sure about some of these
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_pt.c
rename to arch/x86/events/intel/pt.c
index c0bbd10..6af7cf7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -29,8 +29,8 @@
 #include <asm/io.h>
 #include <asm/intel_pt.h>
 
-#include "perf_event.h"
-#include "intel_pt.h"
+#include "../perf_event.h"
+#include "pt.h"
 
 static DEFINE_PER_CPU(struct pt, pt_ctx);
 
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h
similarity index 100%
rename from arch/x86/kernel/cpu/intel_pt.h
rename to arch/x86/events/intel/pt.h
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c
similarity index 73%
rename from arch/x86/kernel/cpu/perf_event_intel_rapl.c
rename to arch/x86/events/intel/rapl.c
index 24a351a..b834a3f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -44,11 +44,14 @@
  * the duration of the measurement. Tools may use a function such as
  * ldexp(raw_count, -32);
  */
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /*
  * RAPL energy status counters
@@ -107,7 +110,7 @@
 static struct kobj_attribute format_attr_##_var =		\
 	__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
 
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+#define RAPL_CNTR_WIDTH 32
 
 #define RAPL_EVENT_ATTR_STR(_name, v, str)					\
 static struct perf_pmu_events_attr event_attr_##v = {				\
@@ -117,23 +120,33 @@
 };
 
 struct rapl_pmu {
-	spinlock_t	 lock;
-	int		 n_active; /* number of active events */
-	struct list_head active_list;
-	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
-	ktime_t		 timer_interval; /* in ktime_t unit */
-	struct hrtimer   hrtimer;
+	raw_spinlock_t		lock;
+	int			n_active;
+	int			cpu;
+	struct list_head	active_list;
+	struct pmu		*pmu;
+	ktime_t			timer_interval;
+	struct hrtimer		hrtimer;
 };
 
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
+struct rapl_pmus {
+	struct pmu		pmu;
+	unsigned int		maxpkg;
+	struct rapl_pmu		*pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
 static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
 
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+	return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
 
-static struct x86_pmu_quirk *rapl_quirks;
 static inline u64 rapl_read_counter(struct perf_event *event)
 {
 	u64 raw;
@@ -141,19 +154,10 @@
 	return raw;
 }
 
-#define rapl_add_quirk(func_)						\
-do {									\
-	static struct x86_pmu_quirk __quirk __initdata = {		\
-		.func = func_,						\
-	};								\
-	__quirk.next = rapl_quirks;					\
-	rapl_quirks = &__quirk;						\
-} while (0)
-
 static inline u64 rapl_scale(u64 v, int cfg)
 {
 	if (cfg > NR_RAPL_DOMAINS) {
-		pr_warn("invalid domain %d, failed to scale data\n", cfg);
+		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
 		return v;
 	}
 	/*
@@ -206,27 +210,21 @@
 		     HRTIMER_MODE_REL_PINNED);
 }
 
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
-	hrtimer_cancel(&pmu->hrtimer);
-}
-
 static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
 	struct perf_event *event;
 	unsigned long flags;
 
 	if (!pmu->n_active)
 		return HRTIMER_NORESTART;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 
-	list_for_each_entry(event, &pmu->active_list, active_entry) {
+	list_for_each_entry(event, &pmu->active_list, active_entry)
 		rapl_event_update(event);
-	}
 
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 
 	hrtimer_forward_now(hrtimer, pmu->timer_interval);
 
@@ -260,28 +258,28 @@
 
 static void rapl_pmu_event_start(struct perf_event *event, int mode)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = event->pmu_private;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 	__rapl_pmu_event_start(pmu, event);
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 }
 
 static void rapl_pmu_event_stop(struct perf_event *event, int mode)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = event->pmu_private;
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 
 	/* mark event as deactivated and stopped */
 	if (!(hwc->state & PERF_HES_STOPPED)) {
 		WARN_ON_ONCE(pmu->n_active <= 0);
 		pmu->n_active--;
 		if (pmu->n_active == 0)
-			rapl_stop_hrtimer(pmu);
+			hrtimer_cancel(&pmu->hrtimer);
 
 		list_del(&event->active_entry);
 
@@ -299,23 +297,23 @@
 		hwc->state |= PERF_HES_UPTODATE;
 	}
 
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 }
 
 static int rapl_pmu_event_add(struct perf_event *event, int mode)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = event->pmu_private;
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 
 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 
 	if (mode & PERF_EF_START)
 		__rapl_pmu_event_start(pmu, event);
 
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 
 	return 0;
 }
@@ -329,15 +327,19 @@
 {
 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
 	int bit, msr, ret = 0;
+	struct rapl_pmu *pmu;
 
 	/* only look at RAPL events */
-	if (event->attr.type != rapl_pmu_class.type)
+	if (event->attr.type != rapl_pmus->pmu.type)
 		return -ENOENT;
 
 	/* check only supported bits are set */
 	if (event->attr.config & ~RAPL_EVENT_MASK)
 		return -EINVAL;
 
+	if (event->cpu < 0)
+		return -EINVAL;
+
 	/*
 	 * check event is known (determines counter)
 	 */
@@ -376,6 +378,9 @@
 		return -EINVAL;
 
 	/* must be done before validate_group */
+	pmu = cpu_to_rapl_pmu(event->cpu);
+	event->cpu = pmu->cpu;
+	event->pmu_private = pmu;
 	event->hw.event_base = msr;
 	event->hw.config = cfg;
 	event->hw.idx = bit;
@@ -506,139 +511,62 @@
 	NULL,
 };
 
-static struct pmu rapl_pmu_class = {
-	.attr_groups	= rapl_attr_groups,
-	.task_ctx_nr	= perf_invalid_context, /* system-wide only */
-	.event_init	= rapl_pmu_event_init,
-	.add		= rapl_pmu_event_add, /* must have */
-	.del		= rapl_pmu_event_del, /* must have */
-	.start		= rapl_pmu_event_start,
-	.stop		= rapl_pmu_event_stop,
-	.read		= rapl_pmu_event_read,
-};
-
 static void rapl_cpu_exit(int cpu)
 {
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-	int i, phys_id = topology_physical_package_id(cpu);
-	int target = -1;
+	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+	int target;
 
-	/* find a new cpu on same package */
-	for_each_online_cpu(i) {
-		if (i == cpu)
-			continue;
-		if (phys_id == topology_physical_package_id(i)) {
-			target = i;
-			break;
-		}
-	}
-	/*
-	 * clear cpu from cpumask
-	 * if was set in cpumask and still some cpu on package,
-	 * then move to new cpu
-	 */
-	if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
+	/* Check if exiting cpu is used for collecting rapl events */
+	if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+		return;
+
+	pmu->cpu = -1;
+	/* Find a new cpu to collect rapl events */
+	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+	/* Migrate rapl events to the new target */
+	if (target < nr_cpu_ids) {
 		cpumask_set_cpu(target, &rapl_cpu_mask);
-
-	WARN_ON(cpumask_empty(&rapl_cpu_mask));
-	/*
-	 * migrate events and context to new cpu
-	 */
-	if (target >= 0)
+		pmu->cpu = target;
 		perf_pmu_migrate_context(pmu->pmu, cpu, target);
-
-	/* cancel overflow polling timer for CPU */
-	rapl_stop_hrtimer(pmu);
+	}
 }
 
 static void rapl_cpu_init(int cpu)
 {
-	int i, phys_id = topology_physical_package_id(cpu);
+	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+	int target;
 
-	/* check if phys_is is already covered */
-	for_each_cpu(i, &rapl_cpu_mask) {
-		if (phys_id == topology_physical_package_id(i))
-			return;
-	}
-	/* was not found, so add it */
-	cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
 	/*
-	 * DRAM domain on HSW server has fixed energy unit which can be
-	 * different than the unit from power unit MSR.
-	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
-	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 * Check if there is an online cpu in the package which collects rapl
+	 * events already.
 	 */
-	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+	target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+	if (target < nr_cpu_ids)
+		return;
+
+	cpumask_set_cpu(cpu, &rapl_cpu_mask);
+	pmu->cpu = cpu;
 }
 
 static int rapl_cpu_prepare(int cpu)
 {
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-	int phys_id = topology_physical_package_id(cpu);
-	u64 ms;
+	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
 
 	if (pmu)
 		return 0;
 
-	if (phys_id < 0)
-		return -1;
-
 	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
 	if (!pmu)
-		return -1;
-	spin_lock_init(&pmu->lock);
+		return -ENOMEM;
 
+	raw_spin_lock_init(&pmu->lock);
 	INIT_LIST_HEAD(&pmu->active_list);
-
-	pmu->pmu = &rapl_pmu_class;
-
-	/*
-	 * use reference of 200W for scaling the timeout
-	 * to avoid missing counter overflows.
-	 * 200W = 200 Joules/sec
-	 * divide interval by 2 to avoid lockstep (2 * 100)
-	 * if hw unit is 32, then we use 2 ms 1/200/2
-	 */
-	if (rapl_hw_unit[0] < 32)
-		ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
-	else
-		ms = 2;
-
-	pmu->timer_interval = ms_to_ktime(ms);
-
+	pmu->pmu = &rapl_pmus->pmu;
+	pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+	pmu->cpu = -1;
 	rapl_hrtimer_init(pmu);
-
-	/* set RAPL pmu for this cpu for now */
-	per_cpu(rapl_pmu, cpu) = pmu;
-	per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
-	return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
-	kfree(pmu);
-
-	per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
-	if (!pmu)
-		return 0;
-
-	per_cpu(rapl_pmu, cpu) = NULL;
-
-	per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
+	rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
 	return 0;
 }
 
@@ -651,28 +579,20 @@
 	case CPU_UP_PREPARE:
 		rapl_cpu_prepare(cpu);
 		break;
-	case CPU_STARTING:
+
+	case CPU_DOWN_FAILED:
+	case CPU_ONLINE:
 		rapl_cpu_init(cpu);
 		break;
-	case CPU_UP_CANCELED:
-	case CPU_DYING:
-		rapl_cpu_dying(cpu);
-		break;
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		rapl_cpu_kfree(cpu);
-		break;
+
 	case CPU_DOWN_PREPARE:
 		rapl_cpu_exit(cpu);
 		break;
-	default:
-		break;
 	}
-
 	return NOTIFY_OK;
 }
 
-static int rapl_check_hw_unit(void)
+static int rapl_check_hw_unit(bool apply_quirk)
 {
 	u64 msr_rapl_power_unit_bits;
 	int i;
@@ -683,28 +603,107 @@
 	for (i = 0; i < NR_RAPL_DOMAINS; i++)
 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
 
+	/*
+	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
+	 * different than the unit from power unit MSR. See
+	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 */
+	if (apply_quirk)
+		rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+	/*
+	 * Calculate the timer rate:
+	 * Use reference of 200W for scaling the timeout to avoid counter
+	 * overflows. 200W = 200 Joules/sec
+	 * Divide interval by 2 to avoid lockstep (2 * 100)
+	 * if hw unit is 32, then we use 2 ms 1/200/2
+	 */
+	rapl_timer_ms = 2;
+	if (rapl_hw_unit[0] < 32) {
+		rapl_timer_ms = (1000 / (2 * 100));
+		rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+	}
 	return 0;
 }
 
-static const struct x86_cpu_id rapl_cpu_match[] = {
+static void __init rapl_advertise(void)
+{
+	int i;
+
+	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+		hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+		if (rapl_cntr_mask & (1 << i)) {
+			pr_info("hw unit of domain %s 2^-%d Joules\n",
+				rapl_domain_names[i], rapl_hw_unit[i]);
+		}
+	}
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+	unsigned int cpu, pkg;
+	int ret;
+
+	for_each_online_cpu(cpu) {
+		pkg = topology_logical_package_id(cpu);
+		if (rapl_pmus->pmus[pkg])
+			continue;
+
+		ret = rapl_cpu_prepare(cpu);
+		if (ret)
+			return ret;
+		rapl_cpu_init(cpu);
+	}
+	return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+	int i;
+
+	for (i = 0; i < rapl_pmus->maxpkg; i++)
+		kfree(rapl_pmus->pmus + i);
+	kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+	int maxpkg = topology_max_packages();
+	size_t size;
+
+	size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+	rapl_pmus = kzalloc(size, GFP_KERNEL);
+	if (!rapl_pmus)
+		return -ENOMEM;
+
+	rapl_pmus->maxpkg		= maxpkg;
+	rapl_pmus->pmu.attr_groups	= rapl_attr_groups;
+	rapl_pmus->pmu.task_ctx_nr	= perf_invalid_context;
+	rapl_pmus->pmu.event_init	= rapl_pmu_event_init;
+	rapl_pmus->pmu.add		= rapl_pmu_event_add;
+	rapl_pmus->pmu.del		= rapl_pmu_event_del;
+	rapl_pmus->pmu.start		= rapl_pmu_event_start;
+	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
+	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+	return 0;
+}
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
 	[1] = {},
 };
 
 static int __init rapl_pmu_init(void)
 {
-	struct rapl_pmu *pmu;
-	int cpu, ret;
-	struct x86_pmu_quirk *quirk;
-	int i;
+	bool apply_quirk = false;
+	int ret;
 
-	/*
-	 * check for Intel processor family 6
-	 */
 	if (!x86_match_cpu(rapl_cpu_match))
-		return 0;
+		return -ENODEV;
 
-	/* check supported CPU */
 	switch (boot_cpu_data.x86_model) {
 	case 42: /* Sandy Bridge */
 	case 58: /* Ivy Bridge */
@@ -712,7 +711,7 @@
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
 	case 63: /* Haswell-Server */
-		rapl_add_quirk(rapl_hsw_server_quirk);
+		apply_quirk = true;
 		rapl_cntr_mask = RAPL_IDX_SRV;
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
 		break;
@@ -728,56 +727,41 @@
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
 		break;
 	case 87: /* Knights Landing */
-		rapl_add_quirk(rapl_hsw_server_quirk);
+		apply_quirk = true;
 		rapl_cntr_mask = RAPL_IDX_KNL;
 		rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
+		break;
 	default:
-		/* unsupported */
-		return 0;
+		return -ENODEV;
 	}
-	ret = rapl_check_hw_unit();
+
+	ret = rapl_check_hw_unit(apply_quirk);
 	if (ret)
 		return ret;
 
-	/* run cpu model quirks */
-	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
-		quirk->func();
+	ret = init_rapl_pmus();
+	if (ret)
+		return ret;
+
 	cpu_notifier_register_begin();
 
-	for_each_online_cpu(cpu) {
-		ret = rapl_cpu_prepare(cpu);
-		if (ret)
-			goto out;
-		rapl_cpu_init(cpu);
-	}
+	ret = rapl_prepare_cpus();
+	if (ret)
+		goto out;
+
+	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+	if (ret)
+		goto out;
 
 	__perf_cpu_notifier(rapl_cpu_notifier);
-
-	ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
-	if (WARN_ON(ret)) {
-		pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
-		cpu_notifier_register_done();
-		return -1;
-	}
-
-	pmu = __this_cpu_read(rapl_pmu);
-
-	pr_info("RAPL PMU detected,"
-		" API unit is 2^-32 Joules,"
-		" %d fixed counters"
-		" %llu ms ovfl timer\n",
-		hweight32(rapl_cntr_mask),
-		ktime_to_ms(pmu->timer_interval));
-	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
-		if (rapl_cntr_mask & (1 << i)) {
-			pr_info("hw unit of domain %s 2^-%d Joules\n",
-				rapl_domain_names[i], rapl_hw_unit[i]);
-		}
-	}
-out:
 	cpu_notifier_register_done();
-
+	rapl_advertise();
 	return 0;
+
+out:
+	pr_warn("Initialization failed (%d), disabled\n", ret);
+	cleanup_rapl_pmus();
+	cpu_notifier_register_done();
+	return ret;
 }
 device_initcall(rapl_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c
similarity index 74%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore.c
rename to arch/x86/events/intel/uncore.c
index f97f807..7012d18 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,4 @@
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
@@ -9,9 +9,9 @@
 /* pci bus to socket mapping */
 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
 
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
 /* mask of cpus that collect uncore events */
 static cpumask_t uncore_cpu_mask;
 
@@ -21,7 +21,7 @@
 struct event_constraint uncore_constraint_empty =
 	EVENT_CONSTRAINT(0, 0, 0);
 
-int uncore_pcibus_to_physid(struct pci_bus *bus)
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
 {
 	struct pci2phy_map *map;
 	int phys_id = -1;
@@ -38,6 +38,16 @@
 	return phys_id;
 }
 
+static void uncore_free_pcibus_map(void)
+{
+	struct pci2phy_map *map, *tmp;
+
+	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+		list_del(&map->list);
+		kfree(map);
+	}
+}
+
 struct pci2phy_map *__find_pci2phy_map(int segment)
 {
 	struct pci2phy_map *map, *alloc = NULL;
@@ -82,43 +92,9 @@
 	return sprintf(buf, "%s", event->config);
 }
 
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
-	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
-	struct intel_uncore_box *box;
-
-	box = *per_cpu_ptr(pmu->box, cpu);
-	if (box)
-		return box;
-
-	raw_spin_lock(&uncore_box_lock);
-	/* Recheck in lock to handle races. */
-	if (*per_cpu_ptr(pmu->box, cpu))
-		goto out;
-	list_for_each_entry(box, &pmu->box_list, list) {
-		if (box->phys_id == topology_physical_package_id(cpu)) {
-			atomic_inc(&box->refcnt);
-			*per_cpu_ptr(pmu->box, cpu) = box;
-			break;
-		}
-	}
-out:
-	raw_spin_unlock(&uncore_box_lock);
-
-	return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
-	/*
-	 * perf core schedules event on the basis of cpu, uncore events are
-	 * collected by one of the cpus inside a physical package.
-	 */
-	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
+	return pmu->boxes[topology_logical_package_id(cpu)];
 }
 
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -207,7 +183,8 @@
 	return config;
 }
 
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+				   struct perf_event *event, int idx)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -302,24 +279,25 @@
 	box->hrtimer.function = uncore_pmu_hrtimer;
 }
 
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+						 int node)
 {
+	int i, size, numshared = type->num_shared_regs ;
 	struct intel_uncore_box *box;
-	int i, size;
 
-	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
+	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
 
 	box = kzalloc_node(size, GFP_KERNEL, node);
 	if (!box)
 		return NULL;
 
-	for (i = 0; i < type->num_shared_regs; i++)
+	for (i = 0; i < numshared; i++)
 		raw_spin_lock_init(&box->shared_regs[i].lock);
 
 	uncore_pmu_init_hrtimer(box);
-	atomic_set(&box->refcnt, 1);
 	box->cpu = -1;
-	box->phys_id = -1;
+	box->pci_phys_id = -1;
+	box->pkgid = -1;
 
 	/* set default hrtimer timeout */
 	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -341,7 +319,8 @@
 }
 
 static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+		      bool dogrp)
 {
 	struct perf_event *event;
 	int n, max_count;
@@ -402,7 +381,8 @@
 	return &type->unconstrainted;
 }
 
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+					struct perf_event *event)
 {
 	if (box->pmu->type->ops->put_constraint)
 		box->pmu->type->ops->put_constraint(box, event);
@@ -582,7 +562,7 @@
 		if (event == box->event_list[i]) {
 			uncore_put_event_constraint(box, event);
 
-			while (++i < box->n_events)
+			for (++i; i < box->n_events; i++)
 				box->event_list[i - 1] = box->event_list[i];
 
 			--box->n_events;
@@ -676,6 +656,7 @@
 	if (!box || box->cpu < 0)
 		return -EINVAL;
 	event->cpu = box->cpu;
+	event->pmu_private = box;
 
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
@@ -760,64 +741,110 @@
 	}
 
 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+	if (!ret)
+		pmu->registered = true;
 	return ret;
 }
 
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+	if (!pmu->registered)
+		return;
+	perf_pmu_unregister(&pmu->pmu);
+	pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+	struct intel_uncore_pmu *pmu = type->pmus;
+	struct intel_uncore_box *box;
+	int i, pkg;
+
+	if (pmu) {
+		pkg = topology_physical_package_id(cpu);
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (box)
+				uncore_box_exit(box);
+		}
+	}
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+	struct intel_uncore_type **types;
+
+	for (types = uncore_msr_uncores; *types; types++)
+		__uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+	int pkg;
+
+	for (pkg = 0; pkg < max_packages; pkg++)
+		kfree(pmu->boxes[pkg]);
+	kfree(pmu->boxes);
+}
+
 static void __init uncore_type_exit(struct intel_uncore_type *type)
 {
+	struct intel_uncore_pmu *pmu = type->pmus;
 	int i;
 
-	for (i = 0; i < type->num_boxes; i++)
-		free_percpu(type->pmus[i].box);
-	kfree(type->pmus);
-	type->pmus = NULL;
+	if (pmu) {
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			uncore_pmu_unregister(pmu);
+			uncore_free_boxes(pmu);
+		}
+		kfree(type->pmus);
+		type->pmus = NULL;
+	}
 	kfree(type->events_group);
 	type->events_group = NULL;
 }
 
 static void __init uncore_types_exit(struct intel_uncore_type **types)
 {
-	int i;
-	for (i = 0; types[i]; i++)
-		uncore_type_exit(types[i]);
+	for (; *types; types++)
+		uncore_type_exit(*types);
 }
 
-static int __init uncore_type_init(struct intel_uncore_type *type)
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 {
 	struct intel_uncore_pmu *pmus;
 	struct attribute_group *attr_group;
 	struct attribute **attrs;
+	size_t size;
 	int i, j;
 
 	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
 	if (!pmus)
 		return -ENOMEM;
 
-	type->pmus = pmus;
+	size = max_packages * sizeof(struct intel_uncore_box *);
 
+	for (i = 0; i < type->num_boxes; i++) {
+		pmus[i].func_id	= setid ? i : -1;
+		pmus[i].pmu_idx	= i;
+		pmus[i].type	= type;
+		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
+		if (!pmus[i].boxes)
+			return -ENOMEM;
+	}
+
+	type->pmus = pmus;
 	type->unconstrainted = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
 				0, type->num_counters, 0, 0);
 
-	for (i = 0; i < type->num_boxes; i++) {
-		pmus[i].func_id = -1;
-		pmus[i].pmu_idx = i;
-		pmus[i].type = type;
-		INIT_LIST_HEAD(&pmus[i].box_list);
-		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
-		if (!pmus[i].box)
-			goto fail;
-	}
-
 	if (type->event_descs) {
-		i = 0;
-		while (type->event_descs[i].attr.attr.name)
-			i++;
+		for (i = 0; type->event_descs[i].attr.attr.name; i++);
 
 		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
 					sizeof(*attr_group), GFP_KERNEL);
 		if (!attr_group)
-			goto fail;
+			return -ENOMEM;
 
 		attrs = (struct attribute **)(attr_group + 1);
 		attr_group->name = "events";
@@ -831,25 +858,19 @@
 
 	type->pmu_group = &uncore_pmu_attr_group;
 	return 0;
-fail:
-	uncore_type_exit(type);
-	return -ENOMEM;
 }
 
-static int __init uncore_types_init(struct intel_uncore_type **types)
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
 {
-	int i, ret;
+	int ret;
 
-	for (i = 0; types[i]; i++) {
-		ret = uncore_type_init(types[i]);
+	for (; *types; types++) {
+		ret = uncore_type_init(*types, setid);
 		if (ret)
-			goto fail;
+			return ret;
 	}
 	return 0;
-fail:
-	while (--i >= 0)
-		uncore_type_exit(types[i]);
-	return ret;
 }
 
 /*
@@ -857,28 +878,28 @@
  */
 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct intel_uncore_type *type;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	struct intel_uncore_type *type;
-	int phys_id;
-	bool first_box = false;
+	int phys_id, pkg, ret;
 
 	phys_id = uncore_pcibus_to_physid(pdev->bus);
 	if (phys_id < 0)
 		return -ENODEV;
 
+	pkg = topology_phys_to_logical_pkg(phys_id);
+	if (WARN_ON_ONCE(pkg < 0))
+		return -EINVAL;
+
 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
 		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
-		uncore_extra_pci_dev[phys_id][idx] = pdev;
+
+		uncore_extra_pci_dev[pkg].dev[idx] = pdev;
 		pci_set_drvdata(pdev, NULL);
 		return 0;
 	}
 
 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
-	box = uncore_alloc_box(type, NUMA_NO_NODE);
-	if (!box)
-		return -ENOMEM;
-
 	/*
 	 * for performance monitoring unit with multiple boxes,
 	 * each box has a different function id.
@@ -890,44 +911,60 @@
 	 * some device types. Hence PCI device idx would be 0 for all devices.
 	 * So increment pmu pointer to point to an unused array element.
 	 */
-	if (boot_cpu_data.x86_model == 87)
+	if (boot_cpu_data.x86_model == 87) {
 		while (pmu->func_id >= 0)
 			pmu++;
+	}
+
+	if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+		return -EINVAL;
+
+	box = uncore_alloc_box(type, NUMA_NO_NODE);
+	if (!box)
+		return -ENOMEM;
+
 	if (pmu->func_id < 0)
 		pmu->func_id = pdev->devfn;
 	else
 		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
 
-	box->phys_id = phys_id;
+	atomic_inc(&box->refcnt);
+	box->pci_phys_id = phys_id;
+	box->pkgid = pkg;
 	box->pci_dev = pdev;
 	box->pmu = pmu;
 	uncore_box_init(box);
 	pci_set_drvdata(pdev, box);
 
-	raw_spin_lock(&uncore_box_lock);
-	if (list_empty(&pmu->box_list))
-		first_box = true;
-	list_add_tail(&box->list, &pmu->box_list);
-	raw_spin_unlock(&uncore_box_lock);
+	pmu->boxes[pkg] = box;
+	if (atomic_inc_return(&pmu->activeboxes) > 1)
+		return 0;
 
-	if (first_box)
-		uncore_pmu_register(pmu);
-	return 0;
+	/* First active box registers the pmu */
+	ret = uncore_pmu_register(pmu);
+	if (ret) {
+		pci_set_drvdata(pdev, NULL);
+		pmu->boxes[pkg] = NULL;
+		uncore_box_exit(box);
+		kfree(box);
+	}
+	return ret;
 }
 
 static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
 	struct intel_uncore_pmu *pmu;
-	int i, cpu, phys_id;
-	bool last_box = false;
+	int i, phys_id, pkg;
 
 	phys_id = uncore_pcibus_to_physid(pdev->bus);
+	pkg = topology_phys_to_logical_pkg(phys_id);
+
 	box = pci_get_drvdata(pdev);
 	if (!box) {
 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
-			if (uncore_extra_pci_dev[phys_id][i] == pdev) {
-				uncore_extra_pci_dev[phys_id][i] = NULL;
+			if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+				uncore_extra_pci_dev[pkg].dev[i] = NULL;
 				break;
 			}
 		}
@@ -936,33 +973,20 @@
 	}
 
 	pmu = box->pmu;
-	if (WARN_ON_ONCE(phys_id != box->phys_id))
+	if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
 		return;
 
 	pci_set_drvdata(pdev, NULL);
-
-	raw_spin_lock(&uncore_box_lock);
-	list_del(&box->list);
-	if (list_empty(&pmu->box_list))
-		last_box = true;
-	raw_spin_unlock(&uncore_box_lock);
-
-	for_each_possible_cpu(cpu) {
-		if (*per_cpu_ptr(pmu->box, cpu) == box) {
-			*per_cpu_ptr(pmu->box, cpu) = NULL;
-			atomic_dec(&box->refcnt);
-		}
-	}
-
-	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+	pmu->boxes[pkg] = NULL;
+	if (atomic_dec_return(&pmu->activeboxes) == 0)
+		uncore_pmu_unregister(pmu);
+	uncore_box_exit(box);
 	kfree(box);
-
-	if (last_box)
-		perf_pmu_unregister(&pmu->pmu);
 }
 
 static int __init uncore_pci_init(void)
 {
+	size_t size;
 	int ret;
 
 	switch (boot_cpu_data.x86_model) {
@@ -995,26 +1019,44 @@
 	case 87: /* Knights Landing */
 		ret = knl_uncore_pci_init();
 		break;
+	case 94: /* SkyLake */
+		ret = skl_uncore_pci_init();
+		break;
 	default:
-		return 0;
+		return -ENODEV;
 	}
 
 	if (ret)
 		return ret;
 
-	ret = uncore_types_init(uncore_pci_uncores);
+	size = max_packages * sizeof(struct pci_extra_dev);
+	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+	if (!uncore_extra_pci_dev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = uncore_types_init(uncore_pci_uncores, false);
 	if (ret)
-		return ret;
+		goto errtype;
 
 	uncore_pci_driver->probe = uncore_pci_probe;
 	uncore_pci_driver->remove = uncore_pci_remove;
 
 	ret = pci_register_driver(uncore_pci_driver);
-	if (ret == 0)
-		pcidrv_registered = true;
-	else
-		uncore_types_exit(uncore_pci_uncores);
+	if (ret)
+		goto errtype;
 
+	pcidrv_registered = true;
+	return 0;
+
+errtype:
+	uncore_types_exit(uncore_pci_uncores);
+	kfree(uncore_extra_pci_dev);
+	uncore_extra_pci_dev = NULL;
+	uncore_free_pcibus_map();
+err:
+	uncore_pci_uncores = empty_uncore;
 	return ret;
 }
 
@@ -1024,173 +1066,139 @@
 		pcidrv_registered = false;
 		pci_unregister_driver(uncore_pci_driver);
 		uncore_types_exit(uncore_pci_uncores);
-	}
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
-	struct intel_uncore_box *box;
-
-	while (!list_empty(&boxes_to_free)) {
-		box = list_entry(boxes_to_free.next,
-				 struct intel_uncore_box, list);
-		list_del(&box->list);
-		kfree(box);
+		kfree(uncore_extra_pci_dev);
+		uncore_free_pcibus_map();
 	}
 }
 
 static void uncore_cpu_dying(int cpu)
 {
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, j;
+	int i, pkg;
 
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			box = *per_cpu_ptr(pmu->box, cpu);
-			*per_cpu_ptr(pmu->box, cpu) = NULL;
-			if (box && atomic_dec_and_test(&box->refcnt))
-				list_add(&box->list, &boxes_to_free);
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (box && atomic_dec_return(&box->refcnt) == 0)
+				uncore_box_exit(box);
 		}
 	}
 }
 
-static int uncore_cpu_starting(int cpu)
+static void uncore_cpu_starting(int cpu, bool init)
 {
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box, *exist;
-	int i, j, k, phys_id;
+	struct intel_uncore_box *box;
+	int i, pkg, ncpus = 1;
 
-	phys_id = topology_physical_package_id(cpu);
+	if (init) {
+		/*
+		 * On init we get the number of online cpus in the package
+		 * and set refcount for all of them.
+		 */
+		ncpus = cpumask_weight(topology_core_cpumask(cpu));
+	}
 
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			box = *per_cpu_ptr(pmu->box, cpu);
-			/* called by uncore_cpu_init? */
-			if (box && box->phys_id >= 0) {
-				uncore_box_init(box);
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (!box)
 				continue;
-			}
-
-			for_each_online_cpu(k) {
-				exist = *per_cpu_ptr(pmu->box, k);
-				if (exist && exist->phys_id == phys_id) {
-					atomic_inc(&exist->refcnt);
-					*per_cpu_ptr(pmu->box, cpu) = exist;
-					if (box) {
-						list_add(&box->list,
-							 &boxes_to_free);
-						box = NULL;
-					}
-					break;
-				}
-			}
-
-			if (box) {
-				box->phys_id = phys_id;
+			/* The first cpu on a package activates the box */
+			if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
 				uncore_box_init(box);
-			}
 		}
 	}
-	return 0;
 }
 
-static int uncore_cpu_prepare(int cpu, int phys_id)
+static int uncore_cpu_prepare(int cpu)
 {
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, j;
+	int i, pkg;
 
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			if (pmu->func_id < 0)
-				pmu->func_id = j;
-
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			if (pmu->boxes[pkg])
+				continue;
+			/* First cpu of a package allocates the box */
 			box = uncore_alloc_box(type, cpu_to_node(cpu));
 			if (!box)
 				return -ENOMEM;
-
 			box->pmu = pmu;
-			box->phys_id = phys_id;
-			*per_cpu_ptr(pmu->box, cpu) = box;
+			box->pkgid = pkg;
+			pmu->boxes[pkg] = box;
 		}
 	}
 	return 0;
 }
 
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+				   int new_cpu)
 {
-	struct intel_uncore_type *type;
-	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_pmu *pmu = type->pmus;
 	struct intel_uncore_box *box;
-	int i, j;
+	int i, pkg;
 
-	for (i = 0; uncores[i]; i++) {
-		type = uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			if (old_cpu < 0)
-				box = uncore_pmu_to_box(pmu, new_cpu);
-			else
-				box = uncore_pmu_to_box(pmu, old_cpu);
-			if (!box)
-				continue;
+	pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+	for (i = 0; i < type->num_boxes; i++, pmu++) {
+		box = pmu->boxes[pkg];
+		if (!box)
+			continue;
 
-			if (old_cpu < 0) {
-				WARN_ON_ONCE(box->cpu != -1);
-				box->cpu = new_cpu;
-				continue;
-			}
-
-			WARN_ON_ONCE(box->cpu != old_cpu);
-			if (new_cpu >= 0) {
-				uncore_pmu_cancel_hrtimer(box);
-				perf_pmu_migrate_context(&pmu->pmu,
-						old_cpu, new_cpu);
-				box->cpu = new_cpu;
-			} else {
-				box->cpu = -1;
-			}
+		if (old_cpu < 0) {
+			WARN_ON_ONCE(box->cpu != -1);
+			box->cpu = new_cpu;
+			continue;
 		}
+
+		WARN_ON_ONCE(box->cpu != old_cpu);
+		box->cpu = -1;
+		if (new_cpu < 0)
+			continue;
+
+		uncore_pmu_cancel_hrtimer(box);
+		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+		box->cpu = new_cpu;
 	}
 }
 
+static void uncore_change_context(struct intel_uncore_type **uncores,
+				  int old_cpu, int new_cpu)
+{
+	for (; *uncores; uncores++)
+		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
 static void uncore_event_exit_cpu(int cpu)
 {
-	int i, phys_id, target;
+	int target;
 
-	/* if exiting cpu is used for collecting uncore events */
+	/* Check if exiting cpu is used for collecting uncore events */
 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
 		return;
 
-	/* find a new cpu to collect uncore events */
-	phys_id = topology_physical_package_id(cpu);
-	target = -1;
-	for_each_online_cpu(i) {
-		if (i == cpu)
-			continue;
-		if (phys_id == topology_physical_package_id(i)) {
-			target = i;
-			break;
-		}
-	}
+	/* Find a new cpu to collect uncore events */
+	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
 
-	/* migrate uncore events to the new cpu */
-	if (target >= 0)
+	/* Migrate uncore events to the new target */
+	if (target < nr_cpu_ids)
 		cpumask_set_cpu(target, &uncore_cpu_mask);
+	else
+		target = -1;
 
 	uncore_change_context(uncore_msr_uncores, cpu, target);
 	uncore_change_context(uncore_pci_uncores, cpu, target);
@@ -1198,13 +1206,15 @@
 
 static void uncore_event_init_cpu(int cpu)
 {
-	int i, phys_id;
+	int target;
 
-	phys_id = topology_physical_package_id(cpu);
-	for_each_cpu(i, &uncore_cpu_mask) {
-		if (phys_id == topology_physical_package_id(i))
-			return;
-	}
+	/*
+	 * Check if there is an online cpu in the package
+	 * which collects uncore events already.
+	 */
+	target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+	if (target < nr_cpu_ids)
+		return;
 
 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
 
@@ -1217,39 +1227,25 @@
 {
 	unsigned int cpu = (long)hcpu;
 
-	/* allocate/free data structure for uncore box */
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-		uncore_cpu_prepare(cpu, -1);
-		break;
+		return notifier_from_errno(uncore_cpu_prepare(cpu));
+
 	case CPU_STARTING:
-		uncore_cpu_starting(cpu);
+		uncore_cpu_starting(cpu, false);
+	case CPU_DOWN_FAILED:
+		uncore_event_init_cpu(cpu);
 		break;
+
 	case CPU_UP_CANCELED:
 	case CPU_DYING:
 		uncore_cpu_dying(cpu);
 		break;
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		uncore_kfree_boxes();
-		break;
-	default:
-		break;
-	}
 
-	/* select the cpu that collects uncore events */
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_FAILED:
-	case CPU_STARTING:
-		uncore_event_init_cpu(cpu);
-		break;
 	case CPU_DOWN_PREPARE:
 		uncore_event_exit_cpu(cpu);
 		break;
-	default:
-		break;
 	}
-
 	return NOTIFY_OK;
 }
 
@@ -1262,9 +1258,29 @@
 	.priority	= CPU_PRI_PERF + 1,
 };
 
-static void __init uncore_cpu_setup(void *dummy)
+static int __init type_pmu_register(struct intel_uncore_type *type)
 {
-	uncore_cpu_starting(smp_processor_id());
+	int i, ret;
+
+	for (i = 0; i < type->num_boxes; i++) {
+		ret = uncore_pmu_register(&type->pmus[i]);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+	struct intel_uncore_type **types = uncore_msr_uncores;
+	int ret;
+
+	for (; *types; types++) {
+		ret = type_pmu_register(*types);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
 
 static int __init uncore_cpu_init(void)
@@ -1308,71 +1324,61 @@
 		knl_uncore_cpu_init();
 		break;
 	default:
-		return 0;
+		return -ENODEV;
 	}
 
-	ret = uncore_types_init(uncore_msr_uncores);
+	ret = uncore_types_init(uncore_msr_uncores, true);
 	if (ret)
-		return ret;
+		goto err;
 
+	ret = uncore_msr_pmus_register();
+	if (ret)
+		goto err;
 	return 0;
+err:
+	uncore_types_exit(uncore_msr_uncores);
+	uncore_msr_uncores = empty_uncore;
+	return ret;
 }
 
-static int __init uncore_pmus_register(void)
+static void __init uncore_cpu_setup(void *dummy)
 {
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_type *type;
-	int i, j;
-
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			uncore_pmu_register(pmu);
-		}
-	}
-
-	return 0;
+	uncore_cpu_starting(smp_processor_id(), true);
 }
 
-static void __init uncore_cpumask_init(void)
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
+
+static int __init uncore_cpumask_init(bool msr)
 {
-	int cpu;
-
-	/*
-	 * ony invoke once from msr or pci init code
-	 */
-	if (!cpumask_empty(&uncore_cpu_mask))
-		return;
-
-	cpu_notifier_register_begin();
+	unsigned int cpu;
 
 	for_each_online_cpu(cpu) {
-		int i, phys_id = topology_physical_package_id(cpu);
+		unsigned int pkg = topology_logical_package_id(cpu);
+		int ret;
 
-		for_each_cpu(i, &uncore_cpu_mask) {
-			if (phys_id == topology_physical_package_id(i)) {
-				phys_id = -1;
-				break;
-			}
-		}
-		if (phys_id < 0)
+		if (test_and_set_bit(pkg, packages))
 			continue;
-
-		uncore_cpu_prepare(cpu, phys_id);
+		/*
+		 * The first online cpu of each package allocates and takes
+		 * the refcounts for all other online cpus in that package.
+		 * If msrs are not enabled no allocation is required.
+		 */
+		if (msr) {
+			ret = uncore_cpu_prepare(cpu);
+			if (ret)
+				return ret;
+		}
 		uncore_event_init_cpu(cpu);
+		smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
 	}
-	on_each_cpu(uncore_cpu_setup, NULL, 1);
-
 	__register_cpu_notifier(&uncore_cpu_nb);
-
-	cpu_notifier_register_done();
+	return 0;
 }
 
-
 static int __init intel_uncore_init(void)
 {
-	int ret;
+	int pret, cret, ret;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 		return -ENODEV;
@@ -1380,19 +1386,27 @@
 	if (cpu_has_hypervisor)
 		return -ENODEV;
 
-	ret = uncore_pci_init();
-	if (ret)
-		goto fail;
-	ret = uncore_cpu_init();
-	if (ret) {
-		uncore_pci_exit();
-		goto fail;
-	}
-	uncore_cpumask_init();
+	max_packages = topology_max_packages();
 
-	uncore_pmus_register();
+	pret = uncore_pci_init();
+	cret = uncore_cpu_init();
+
+	if (cret && pret)
+		return -ENODEV;
+
+	cpu_notifier_register_begin();
+	ret = uncore_cpumask_init(!cret);
+	if (ret)
+		goto err;
+	cpu_notifier_register_done();
 	return 0;
-fail:
+
+err:
+	/* Undo box->init_box() */
+	on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+	uncore_types_exit(uncore_msr_uncores);
+	uncore_pci_exit();
+	cpu_notifier_register_done();
 	return ret;
 }
 device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h
similarity index 89%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore.h
rename to arch/x86/events/intel/uncore.h
index 07aa2d6..79766b9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -1,8 +1,10 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <asm/apicdef.h>
+
 #include <linux/perf_event.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define UNCORE_PMU_NAME_LEN		32
 #define UNCORE_PMU_HRTIMER_INTERVAL	(60LL * NSEC_PER_SEC)
@@ -19,11 +21,12 @@
 #define UNCORE_EXTRA_PCI_DEV		0xff
 #define UNCORE_EXTRA_PCI_DEV_MAX	3
 
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX		8
-
 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
 
+struct pci_extra_dev {
+	struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
 struct intel_uncore_ops;
 struct intel_uncore_pmu;
 struct intel_uncore_box;
@@ -61,6 +64,7 @@
 
 struct intel_uncore_ops {
 	void (*init_box)(struct intel_uncore_box *);
+	void (*exit_box)(struct intel_uncore_box *);
 	void (*disable_box)(struct intel_uncore_box *);
 	void (*enable_box)(struct intel_uncore_box *);
 	void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
@@ -73,13 +77,14 @@
 };
 
 struct intel_uncore_pmu {
-	struct pmu pmu;
-	char name[UNCORE_PMU_NAME_LEN];
-	int pmu_idx;
-	int func_id;
-	struct intel_uncore_type *type;
-	struct intel_uncore_box ** __percpu box;
-	struct list_head box_list;
+	struct pmu			pmu;
+	char				name[UNCORE_PMU_NAME_LEN];
+	int				pmu_idx;
+	int				func_id;
+	bool				registered;
+	atomic_t			activeboxes;
+	struct intel_uncore_type	*type;
+	struct intel_uncore_box		**boxes;
 };
 
 struct intel_uncore_extra_reg {
@@ -89,7 +94,8 @@
 };
 
 struct intel_uncore_box {
-	int phys_id;
+	int pci_phys_id;
+	int pkgid;
 	int n_active;	/* number of active events */
 	int n_events;
 	int cpu;	/* cpu to collect events */
@@ -123,7 +129,6 @@
 	int pbus_to_physid[256];
 };
 
-int uncore_pcibus_to_physid(struct pci_bus *bus);
 struct pci2phy_map *__find_pci2phy_map(int segment);
 
 ssize_t uncore_event_show(struct kobject *kobj,
@@ -305,14 +310,30 @@
 	}
 }
 
-static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+static inline void uncore_box_exit(struct intel_uncore_box *box)
 {
-	return (box->phys_id < 0);
+	if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+		if (box->pmu->type->ops->exit_box)
+			box->pmu->type->ops->exit_box(box);
+	}
 }
 
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+	return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+	return event->pmu_private;
+}
+
 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
@@ -328,7 +349,7 @@
 extern struct pci_driver *uncore_pci_driver;
 extern raw_spinlock_t pci2phy_map_lock;
 extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
 /* perf_event_intel_uncore_snb.c */
@@ -336,6 +357,7 @@
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
rename to arch/x86/events/intel/uncore_nhmex.c
index 2749965..cda5693 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1,5 +1,5 @@
 /* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
 
 /* NHM-EX event control */
 #define NHMEX_PMON_CTL_EV_SEL_MASK	0x000000ff
@@ -201,6 +201,11 @@
 	wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
 }
 
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
 static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
 {
 	unsigned msr = uncore_msr_box_ctl(box);
@@ -250,6 +255,7 @@
 
 #define NHMEX_UNCORE_OPS_COMMON_INIT()				\
 	.init_box	= nhmex_uncore_msr_init_box,		\
+	.exit_box	= nhmex_uncore_msr_exit_box,		\
 	.disable_box	= nhmex_uncore_msr_disable_box,		\
 	.enable_box	= nhmex_uncore_msr_enable_box,		\
 	.disable_event	= nhmex_uncore_msr_disable_event,	\
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
similarity index 94%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
rename to arch/x86/events/intel/uncore_snb.c
index 0b93482..96531d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,5 +1,5 @@
 /* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
 
 /* Uncore IMC PCI IDs */
 #define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -94,6 +95,12 @@
 	}
 }
 
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0)
+		wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
 static struct uncore_event_desc snb_uncore_events[] = {
 	INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
 	{ /* end: all zeroes */ },
@@ -115,6 +122,7 @@
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.init_box	= snb_uncore_msr_init_box,
+	.exit_box	= snb_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
 	.read_counter	= uncore_msr_read_counter,
@@ -230,6 +238,11 @@
 	box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
 }
 
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+	iounmap(box->io_addr);
+}
+
 static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
 {}
 
@@ -300,6 +313,7 @@
 		return -EINVAL;
 
 	event->cpu = box->cpu;
+	event->pmu_private = box;
 
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
@@ -457,6 +471,7 @@
 
 static struct intel_uncore_ops snb_uncore_imc_ops = {
 	.init_box	= snb_uncore_imc_init_box,
+	.exit_box	= snb_uncore_imc_exit_box,
 	.enable_box	= snb_uncore_imc_enable_box,
 	.disable_box	= snb_uncore_imc_disable_box,
 	.disable_event	= snb_uncore_imc_disable_event,
@@ -524,6 +539,14 @@
 	{ /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
 	.name		= "snb_uncore",
 	.id_table	= snb_uncore_pci_ids,
@@ -544,6 +567,11 @@
 	.id_table	= bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+	.name		= "skl_uncore",
+	.id_table	= skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
 	__u32 pci_id;
 	struct pci_driver *driver;
@@ -558,6 +586,7 @@
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
 	{  /* end marker */ }
 };
 
@@ -610,6 +639,11 @@
 	return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
rename to arch/x86/events/intel/uncore_snbep.c
index 33acb88..93f6bd9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,5 @@
 /* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
+#include "uncore.h"
 
 /* SNB-EP Box level control */
 #define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0)
@@ -987,7 +986,9 @@
 
 	if (reg1->idx != EXTRA_REG_NONE) {
 		int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
-		struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
+		int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+		struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
 		if (filter_pdev) {
 			pci_write_config_dword(filter_pdev, reg1->reg,
 						(u32)reg1->config);
@@ -2521,14 +2522,16 @@
 
 void hswep_uncore_cpu_init(void)
 {
+	int pkg = topology_phys_to_logical_pkg(0);
+
 	if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 
 	/* Detect 6-8 core systems with only two SBOXes */
-	if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
+	if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
 		u32 capid4;
 
-		pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
+		pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
 				      0x94, &capid4);
 		if (((capid4 >> 6) & 0x3) == 0)
 			hswep_uncore_sbox.num_boxes = 2;
@@ -2875,11 +2878,13 @@
 	.format_group		= &hswep_uncore_sbox_format_group,
 };
 
+#define BDX_MSR_UNCORE_SBOX	3
+
 static struct intel_uncore_type *bdx_msr_uncores[] = {
 	&bdx_uncore_ubox,
 	&bdx_uncore_cbox,
-	&bdx_uncore_sbox,
 	&hswep_uncore_pcu,
+	&bdx_uncore_sbox,
 	NULL,
 };
 
@@ -2888,6 +2893,10 @@
 	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 	uncore_msr_uncores = bdx_msr_uncores;
+
+	/* BDX-DE doesn't have SBOX */
+	if (boot_cpu_data.x86_model == 86)
+		uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
 }
 
 static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c
similarity index 100%
rename from arch/x86/kernel/cpu/perf_event_msr.c
rename to arch/x86/events/msr.c
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event.h
rename to arch/x86/events/perf_event.h
index 7bb61e3..68155ca 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -586,6 +586,7 @@
 			pebs_broken	:1,
 			pebs_prec_dist	:1;
 	int		pebs_record_size;
+	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
@@ -860,6 +861,8 @@
 
 extern struct event_constraint intel_hsw_pebs_event_constraints[];
 
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
 extern struct event_constraint intel_skl_pebs_event_constraints[];
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
@@ -904,6 +907,8 @@
 
 void intel_pmu_lbr_init_knl(void);
 
+void intel_pmu_pebs_data_source_nhm(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85b..99afb66 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -152,12 +152,6 @@
 	".popsection"
 
 /*
- * This must be included *after* the definition of ALTERNATIVE due to
- * <asm/arch_hweight.h>
- */
-#include <asm/cpufeature.h>
-
-/*
  * Alternative instructions for different CPU types or capabilities.
  *
  * This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 3c56ef1..5e828da 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -27,15 +27,23 @@
 };
 
 struct threshold_block {
-	unsigned int		block;
-	unsigned int		bank;
-	unsigned int		cpu;
-	u32			address;
-	u16			interrupt_enable;
-	bool			interrupt_capable;
-	u16			threshold_limit;
-	struct kobject		kobj;
-	struct list_head	miscj;
+	unsigned int	 block;			/* Number within bank */
+	unsigned int	 bank;			/* MCA bank the block belongs to */
+	unsigned int	 cpu;			/* CPU which controls MCA bank */
+	u32		 address;		/* MSR address for the block */
+	u16		 interrupt_enable;	/* Enable/Disable APIC interrupt */
+	bool		 interrupt_capable;	/* Bank can generate an interrupt. */
+
+	u16		 threshold_limit;	/*
+						 * Value upon which threshold
+						 * interrupt is generated.
+						 */
+
+	struct kobject	 kobj;			/* sysfs object */
+	struct list_head miscj;			/*
+						 * List of threshold blocks
+						 * within a bank.
+						 */
 };
 
 struct threshold_bank {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6..0899cfc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -6,7 +6,6 @@
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/processor.h>
 #include <asm/apicdef.h>
 #include <linux/atomic.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1..02e799f 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_HWEIGHT_H
 #define _ASM_X86_HWEIGHT_H
 
+#include <asm/cpufeatures.h>
+
 #ifdef CONFIG_64BIT
 /* popcnt %edi, %eax -- redundant REX prefix for alignment */
 #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679a..f5063b6 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -44,19 +44,22 @@
 
 /* Exception table entry */
 #ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to)					\
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
 	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
+	.balign 4 ;						\
 	.long (from) - . ;					\
 	.long (to) - . ;					\
+	.long (handler) - . ;					\
 	.popsection
 
-# define _ASM_EXTABLE_EX(from,to)				\
-	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
-	.long (from) - . ;					\
-	.long (to) - . + 0x7ffffff0 ;				\
-	.popsection
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
 
 # define _ASM_NOKPROBE(entry)					\
 	.pushsection "_kprobe_blacklist","aw" ;			\
@@ -89,19 +92,24 @@
 	.endm
 
 #else
-# define _ASM_EXTABLE(from,to)					\
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
+	" .balign 4\n"						\
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - .\n"				\
+	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
 	" .popsection\n"
 
-# define _ASM_EXTABLE_EX(from,to)				\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
-	" .long (" #from ") - .\n"				\
-	" .long (" #to ") - . + 0x7ffffff0\n"			\
-	" .popsection\n"
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index a584e1c..bfb28ca 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -6,18 +6,17 @@
 
 /*
  * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
+ * And yes, this might be required on UP too when we're talking
  * to devices.
  */
 
 #ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
+				      X86_FEATURE_XMM2) ::: "memory", "cc")
+#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
+				       X86_FEATURE_XMM2) ::: "memory", "cc")
+#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
+				       X86_FEATURE_XMM2) ::: "memory", "cc")
 #else
 #define mb() 	asm volatile("mfence":::"memory")
 #define rmb()	asm volatile("lfence":::"memory")
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b95..7766d1c 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -91,7 +91,7 @@
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
@@ -128,13 +128,13 @@
  * clear_bit() is atomic and implies release semantics before the memory
  * operation. It can be used for an unlock.
  */
-static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
 	clear_bit(nr, addr);
 }
 
-static inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
@@ -151,7 +151,7 @@
  * No memory barrier is required here, because x86 cannot reorder stores past
  * older loads. Same principle as spin_unlock.
  */
-static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
 	__clear_bit(nr, addr);
@@ -166,7 +166,7 @@
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
 }
@@ -180,7 +180,7 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 {
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -201,7 +201,7 @@
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
 }
@@ -228,7 +228,7 @@
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -247,7 +247,7 @@
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
 }
@@ -268,7 +268,7 @@
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -280,7 +280,7 @@
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -300,7 +300,7 @@
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
 }
@@ -311,7 +311,7 @@
 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
-static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
 {
 	int oldbit;
 
@@ -343,7 +343,7 @@
  *
  * Undefined if no bit exists, so code should check against 0 first.
  */
-static inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned long __ffs(unsigned long word)
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
@@ -357,7 +357,7 @@
  *
  * Undefined if no zero exists, so code should check against ~0UL first.
  */
-static inline unsigned long ffz(unsigned long word)
+static __always_inline unsigned long ffz(unsigned long word)
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
@@ -371,7 +371,7 @@
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long __fls(unsigned long word)
 {
 	asm("bsr %1,%0"
 	    : "=r" (word)
@@ -393,7 +393,7 @@
  * set bit if value is nonzero. The first (least significant) bit
  * is at position 1.
  */
-static inline int ffs(int x)
+static __always_inline int ffs(int x)
 {
 	int r;
 
@@ -434,7 +434,7 @@
  * set bit if value is nonzero. The last (most significant) bit is
  * at position 32.
  */
-static inline int fls(int x)
+static __always_inline int fls(int x)
 {
 	int r;
 
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e63aa38..61518cf 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -91,16 +91,10 @@
 
 #define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
 extern const int rodata_test_data;
 extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
 
 #ifdef CONFIG_DEBUG_RODATA_TEST
 int rodata_test(void);
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index eda81dc..d194266 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,10 +3,11 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#define VCLOCK_NONE 0  /* No vDSO clock available.	*/
-#define VCLOCK_TSC  1  /* vDSO should use vread_tsc.	*/
-#define VCLOCK_HPET 2  /* vDSO should use vread_hpet.	*/
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_NONE	0  /* No vDSO clock available.	*/
+#define VCLOCK_TSC	1  /* vDSO should use vread_tsc.	*/
+#define VCLOCK_HPET	2  /* vDSO should use vread_hpet.	*/
+#define VCLOCK_PVCLOCK	3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX	3
 
 struct arch_clocksource_data {
 	int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841..9733361 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
 #define ASM_X86_CMPXCHG_H
 
 #include <linux/compiler.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
 /*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ff557b4..68e4e82 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -1,288 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
-#define NCAPINTS	16	/* N 32-bit words worth of info */
-#define NBUGINTS	1	/* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name.  If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
-					  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_ART		(3*32+10) /* Platform has always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x)		(NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include <asm/processor.h>
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
@@ -369,8 +88,7 @@
  * is not relevant.
  */
 #define cpu_feature_enabled(bit)	\
-	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 :	\
-	 cpu_has(&boot_cpu_data, bit))
+	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
 
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
@@ -406,106 +124,19 @@
 #define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 /*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
  * fast paths and boot_cpu_has() otherwise!
  */
 
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
-
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
  */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -530,66 +161,34 @@
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6:\n"
+			 " testb %[bitnum],%[cap_byte]\n"
+			 " jnz %l[t_yes]\n"
+			 " jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has_safe(bit);
-#else
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 3f - .\n"		/* repl offset */
-			     " .word %P2\n"		/* always replace */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $0,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 5f - .\n"		/* repl offset */
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 4b - 3b\n"		/* src len */
-			     " .byte 6f - 5f\n"		/* repl len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "5: movb $1,%0\n"
-			     "6:\n"
-			     ".previous\n"
-			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
 }
 
-#define static_cpu_has_safe(bit)				\
+#define static_cpu_has(bit)					\
 (								\
 	__builtin_constant_p(boot_cpu_has(bit)) ?		\
 		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
+		_static_cpu_has(bit)				\
 )
 #else
 /*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
-#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
@@ -597,7 +196,6 @@
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
 
 #define static_cpu_has_bug(bit)		static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
 
 #define MAX_CPU_FEATURES		(NCAPINTS * 32)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 0000000..074b760
--- /dev/null
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,300 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS	16	/* N 32-bit words worth of info */
+#define NBUGINTS	1	/* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
+					  /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_ART		( 3*32+10) /* Platform has always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ	( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW	( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL	( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x)		(NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 278441f..eb5deb4 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -98,4 +98,27 @@
 
 #endif /* !__ASSEMBLY__ */
 
+/* Access rights as returned by LAR */
+#define AR_TYPE_RODATA		(0 * (1 << 9))
+#define AR_TYPE_RWDATA		(1 * (1 << 9))
+#define AR_TYPE_RODATA_EXPDOWN	(2 * (1 << 9))
+#define AR_TYPE_RWDATA_EXPDOWN	(3 * (1 << 9))
+#define AR_TYPE_XOCODE		(4 * (1 << 9))
+#define AR_TYPE_XRCODE		(5 * (1 << 9))
+#define AR_TYPE_XOCODE_CONF	(6 * (1 << 9))
+#define AR_TYPE_XRCODE_CONF	(7 * (1 << 9))
+#define AR_TYPE_MASK		(7 * (1 << 9))
+
+#define AR_DPL0			(0 * (1 << 13))
+#define AR_DPL3			(3 * (1 << 13))
+#define AR_DPL_MASK		(3 * (1 << 13))
+
+#define AR_A			(1 << 8)   /* "Accessed" */
+#define AR_S			(1 << 12)  /* If clear, "System" segment */
+#define AR_P			(1 << 15)  /* "Present" */
+#define AR_AVL			(1 << 20)  /* "AVaiLable" (no HW effect) */
+#define AR_L			(1 << 21)  /* "Long mode" for code segments */
+#define AR_DB			(1 << 22)  /* D/B, effect depends on type */
+#define AR_G			(1 << 23)  /* "Granularity" (limit in pages) */
+
 #endif /* _ASM_X86_DESC_DEFS_H */
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 535192f..3c69fed 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -15,7 +15,7 @@
 /* Use early IO mappings for DMI because it's initialized early */
 #define dmi_early_remap		early_ioremap
 #define dmi_early_unmap		early_iounmap
-#define dmi_remap		ioremap
+#define dmi_remap		ioremap_cache
 #define dmi_unmap		iounmap
 
 #endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1514753..15340e3 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -256,7 +256,7 @@
    instruction set this CPU supports.  This could be done in user space,
    but it's not easy, and we've already done it here.  */
 
-#define ELF_HWCAP		(boot_cpu_data.x86_capability[0])
+#define ELF_HWCAP		(boot_cpu_data.x86_capability[CPUID_1_EDX])
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 6d7d0e5..8554f96 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -138,7 +138,7 @@
 extern int fixmaps_set;
 
 extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
+#define kmap_prot PAGE_KERNEL
 extern pte_t *pkmap_page_table;
 
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440d..a212434 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -17,6 +17,7 @@
 #include <asm/user.h>
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
 
 /*
  * High level FPU state handling functions:
@@ -58,22 +59,22 @@
  */
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+	return static_cpu_has(X86_FEATURE_XSAVEOPT);
 }
 
 static __always_inline __pure bool use_xsave(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVE);
+	return static_cpu_has(X86_FEATURE_XSAVE);
 }
 
 static __always_inline __pure bool use_fxsr(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_FXSR);
+	return static_cpu_has(X86_FEATURE_FXSR);
 }
 
 /*
@@ -300,7 +301,7 @@
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -322,7 +323,7 @@
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -460,7 +461,7 @@
 	 * pending. Clear the x87 state here by setting it to fixed values.
 	 * "m" is a random variable that should be in L1.
 	 */
-	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 		asm volatile(
 			"fnclex\n\t"
 			"emms\n\t"
@@ -589,7 +590,8 @@
 	 * If the task has used the math, pre-load the FPU on xsave processors
 	 * or if the past 5 consecutive context-switches used math.
 	 */
-	fpu.preload = new_fpu->fpstate_active &&
+	fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
+		      new_fpu->fpstate_active &&
 		      (use_eager_fpu() || new_fpu->counter > 5);
 
 	if (old_fpu->fpregs_active) {
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index af30fde..f23cd8c 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -20,16 +20,15 @@
 
 /* Supported features which support lazy state saving */
 #define XFEATURE_MASK_LAZY	(XFEATURE_MASK_FP | \
-				 XFEATURE_MASK_SSE)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER	(XFEATURE_MASK_BNDREGS | \
-				 XFEATURE_MASK_BNDCSR | \
+				 XFEATURE_MASK_SSE | \
 				 XFEATURE_MASK_YMM | \
 				 XFEATURE_MASK_OPMASK | \
 				 XFEATURE_MASK_ZMM_Hi256 | \
 				 XFEATURE_MASK_Hi16_ZMM)
 
+/* Supported features which require eager state saving */
+#define XFEATURE_MASK_EAGER	(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
+
 /* All currently supported features */
 #define XCNTXT_MASK	(XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
 
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 793179c..6e4d170 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -1,23 +1,44 @@
-#ifdef __ASSEMBLY__
+#ifndef _ASM_X86_FRAME_H
+#define _ASM_X86_FRAME_H
 
 #include <asm/asm.h>
 
-/* The annotation hides the frame from the unwinder and makes it look
-   like a ordinary ebp save/restore. This avoids some special cases for
-   frame pointer later */
-#ifdef CONFIG_FRAME_POINTER
-	.macro FRAME
-	__ASM_SIZE(push,)	%__ASM_REG(bp)
-	__ASM_SIZE(mov)		%__ASM_REG(sp), %__ASM_REG(bp)
-	.endm
-	.macro ENDFRAME
-	__ASM_SIZE(pop,)	%__ASM_REG(bp)
-	.endm
-#else
-	.macro FRAME
-	.endm
-	.macro ENDFRAME
-	.endm
-#endif
+/*
+ * These are stack frame creation macros.  They should be used by every
+ * callable non-leaf asm function to make kernel stack traces more reliable.
+ */
 
-#endif  /*  __ASSEMBLY__  */
+#ifdef CONFIG_FRAME_POINTER
+
+#ifdef __ASSEMBLY__
+
+.macro FRAME_BEGIN
+	push %_ASM_BP
+	_ASM_MOV %_ASM_SP, %_ASM_BP
+.endm
+
+.macro FRAME_END
+	pop %_ASM_BP
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define FRAME_BEGIN				\
+	"push %" _ASM_BP "\n"			\
+	_ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
+
+#define FRAME_END "pop %" _ASM_BP "\n"
+
+#endif /* __ASSEMBLY__ */
+
+#define FRAME_OFFSET __ASM_SEL(4, 8)
+
+#else /* !CONFIG_FRAME_POINTER */
+
+#define FRAME_BEGIN
+#define FRAME_END
+#define FRAME_OFFSET 0
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_FRAME_H */
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index cd2ce40..ebea2c9 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -53,7 +53,7 @@
 #define IMR_MASK		(IMR_ALIGN - 1)
 
 int imr_add_range(phys_addr_t base, size_t size,
-		  unsigned int rmask, unsigned int wmask, bool lock);
+		  unsigned int rmask, unsigned int wmask);
 
 int imr_remove_range(phys_addr_t base, size_t size);
 
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 881b476..e7de5c9 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -23,11 +23,13 @@
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8..d0afb05 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_IRQ_WORK_H
 #define _ASM_IRQ_WORK_H
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 
 static inline bool arch_irq_work_has_interrupt(void)
 {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c1adf33..bc62e7c 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -17,15 +17,8 @@
 }
 #endif /* CONFIG_KVM_GUEST */
 
-#ifdef CONFIG_DEBUG_RODATA
 #define KVM_HYPERCALL \
         ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
-#else
-/* On AMD processors, vmcall will generate a trap that we will
- * then rewrite to the appropriate instruction.
- */
-#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-#endif
 
 /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index 19c099a..e795f52 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -41,7 +41,7 @@
 	regs->ip = ip;
 }
 #else
-#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#error Include linux/livepatch.h, not asm/livepatch.h
 #endif
 
 #endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2ea4527..92b6f65 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,8 +40,20 @@
 #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
 
 /* AMD-specific bits */
-#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* uncorrected error, deferred exception */
 #define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
+#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */
+
+/*
+ * McaX field if set indicates a given bank supports MCA extensions:
+ *  - Deferred error interrupt type is specifiable by bank.
+ *  - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
+ *    But should not be used to determine MSR numbers.
+ *  - TCC bit is present in MCx_STATUS.
+ */
+#define MCI_CONFIG_MCAX		0x1
+#define MCI_IPID_MCATYPE	0xFFFF0000
+#define MCI_IPID_HWID		0xFFF
 
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -91,6 +103,16 @@
 #define MCE_LOG_LEN 32
 #define MCE_LOG_SIGNATURE	"MACHINECHECK"
 
+/* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_MISC0	0xc0002003
+#define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
+#define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
+#define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
+#define MSR_AMD64_SMCA_MCx_MISC(x)	(MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+
 /*
  * This structure contains all data related to the MCE log.  Also
  * carries a signature to make it easier to find from external
@@ -113,6 +135,7 @@
 	bool ignore_ce;
 	bool disabled;
 	bool ser;
+	bool recovery;
 	bool bios_cmci_threshold;
 	u8 banks;
 	s8 bootlog;
@@ -287,4 +310,49 @@
 extern void apei_mce_report_mem_error(int corrected,
 				      struct cper_sec_mem_err *mem_err);
 
+/*
+ * Enumerate new IP types and HWID values in AMD processors which support
+ * Scalable MCA.
+ */
+#ifdef CONFIG_X86_MCE_AMD
+enum amd_ip_types {
+	SMCA_F17H_CORE = 0,	/* Core errors */
+	SMCA_DF,		/* Data Fabric */
+	SMCA_UMC,		/* Unified Memory Controller */
+	SMCA_PB,		/* Parameter Block */
+	SMCA_PSP,		/* Platform Security Processor */
+	SMCA_SMU,		/* System Management Unit */
+	N_AMD_IP_TYPES
+};
+
+struct amd_hwid {
+	const char *name;
+	unsigned int hwid;
+};
+
+extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
+
+enum amd_core_mca_blocks {
+	SMCA_LS = 0,	/* Load Store */
+	SMCA_IF,	/* Instruction Fetch */
+	SMCA_L2_CACHE,	/* L2 cache */
+	SMCA_DE,	/* Decoder unit */
+	RES,		/* Reserved */
+	SMCA_EX,	/* Execution unit */
+	SMCA_FP,	/* Floating Point */
+	SMCA_L3_CACHE,	/* L3 cache */
+	N_CORE_MCA_BLOCKS
+};
+
+extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+
+enum amd_df_mca_blocks {
+	SMCA_CS = 0,	/* Coherent Slave */
+	SMCA_PIE,	/* Power management, Interrupts, etc */
+	N_DF_BLOCKS
+};
+
+extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+#endif
+
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 1e1b07a..9d3a96c 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -3,6 +3,7 @@
 
 #include <asm/cpu.h>
 #include <linux/earlycpio.h>
+#include <linux/initrd.h>
 
 #define native_rdmsr(msr, val1, val2)			\
 do {							\
@@ -143,4 +144,29 @@
 static inline bool
 get_builtin_firmware(struct cpio_data *cd, const char *name)	{ return false; }
 #endif
+
+static inline unsigned long get_initrd_start(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	return initrd_start;
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long get_initrd_start_addr(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_X86_32
+	unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+
+	return (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+	return get_initrd_start();
+#endif
+#else /* CONFIG_BLK_DEV_INITRD */
+	return 0;
+#endif
+}
+
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 8559b01..603417f 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -40,7 +40,6 @@
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
 #define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
 #define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
-#define DWSIZE			(sizeof(u32))
 
 #define get_totalsize(mc) \
 	(((struct microcode_intel *)mc)->hdr.datasize ? \
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 55234d5..1ea0bae 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,7 +19,8 @@
 #endif
 
 	struct mutex lock;
-	void __user *vdso;
+	void __user *vdso;			/* vdso base address */
+	const struct vdso_image *vdso_image;	/* vdso image in use */
 
 	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
 } mm_context_t;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b05402e..984ab75 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1,7 +1,12 @@
 #ifndef _ASM_X86_MSR_INDEX_H
 #define _ASM_X86_MSR_INDEX_H
 
-/* CPU model specific register (MSR) numbers */
+/*
+ * CPU model specific register (MSR) numbers.
+ *
+ * Do not add new entries to this file unless the definitions are shared
+ * between multiple compilation units.
+ */
 
 /* x86-64 specific MSRs */
 #define MSR_EFER		0xc0000080 /* extended feature register */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b..0deeb2d 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -3,6 +3,8 @@
 
 #include <linux/sched.h>
 
+#include <asm/cpufeature.h>
+
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 46873fb..d08eacd2 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7bcb861..5a2ed3e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -165,6 +165,7 @@
 #define GLOBAL_STATUS_ASIF				BIT_ULL(60)
 #define GLOBAL_STATUS_COUNTERS_FROZEN			BIT_ULL(59)
 #define GLOBAL_STATUS_LBRS_FROZEN			BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI			BIT_ULL(55)
 
 /*
  * IBS cpuid feature detection
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 04c27a0..4432ab7 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -366,20 +366,18 @@
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+	pgprotval_t val = pgprot_val(pgprot);
 	pgprot_t new;
-	unsigned long val;
 
-	val = pgprot_val(pgprot);
 	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
 		((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
 	return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+	pgprotval_t val = pgprot_val(pgprot);
 	pgprot_t new;
-	unsigned long val;
 
-	val = pgprot_val(pgprot);
 	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
 			  ((val & _PAGE_PAT_LARGE) >>
 			   (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2d5a50c..983738a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -13,7 +13,7 @@
 #include <asm/types.h>
 #include <uapi/asm/sigcontext.h>
 #include <asm/current.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 #include <asm/pgtable_types.h>
 #include <asm/percpu.h>
@@ -24,7 +24,6 @@
 #include <asm/fpu/types.h>
 
 #include <linux/personality.h>
-#include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/math64.h>
@@ -129,6 +128,8 @@
 	u16			booted_cores;
 	/* Physical processor id: */
 	u16			phys_proc_id;
+	/* Logical processor id: */
+	u16			logical_proc_id;
 	/* Core id: */
 	u16			cpu_core_id;
 	/* Compute unit id */
@@ -298,10 +299,13 @@
 	 */
 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
 
+#ifdef CONFIG_X86_32
 	/*
-	 * Space for the temporary SYSENTER stack:
+	 * Space for the temporary SYSENTER stack.
 	 */
+	unsigned long		SYSENTER_stack_canary;
 	unsigned long		SYSENTER_stack[64];
+#endif
 
 } ____cacheline_aligned;
 
@@ -766,7 +770,7 @@
  * Return saved PC of a blocked thread.
  * What is this good for? it will be always the scheduler or ret_from_fork.
  */
-#define thread_saved_pc(t)	(*(unsigned long *)((t)->thread.sp - 8))
+#define thread_saved_pc(t)	READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
 
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a4a7728..9b9b30b1 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -7,12 +7,23 @@
 
 void syscall_init(void);
 
+#ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
-void entry_SYSCALL_compat(void);
+#endif
+
+#ifdef CONFIG_X86_32
 void entry_INT80_32(void);
-void entry_INT80_compat(void);
 void entry_SYSENTER_32(void);
+void __begin_SYSENTER_singlestep_region(void);
+void __end_SYSENTER_singlestep_region(void);
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
 void entry_SYSENTER_compat(void);
+void __end_entry_SYSENTER_compat(void);
+void entry_SYSCALL_compat(void);
+void entry_INT80_compat(void);
+#endif
 
 void x86_configure_nx(void);
 void x86_report_nx(void);
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 0a52424..13b6cdd 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -7,7 +7,7 @@
 extern char __brk_base[], __brk_limit[];
 extern struct exception_table_entry __stop___ex_table[];
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
 #endif
 
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 89db467..452c88b 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
 			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		     struct pt_regs *regs, unsigned long mask);
 
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665eb..db33330 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -15,7 +15,7 @@
 
 #include <linux/stringify.h>
 #include <asm/nops.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 
 /* "Raw" instruction opcodes */
 #define __ASM_CLAC	.byte 0x0f,0x01,0xca
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf072..20a3de5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,6 @@
 #endif
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
-#include <asm/cpufeature.h>
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ff8b9a1..ca6ba36 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -78,6 +78,19 @@
 #define memset(s, c, n) __memset(s, c, n)
 #endif
 
+/**
+ * memcpy_mcsafe - copy memory with indication if a machine check happened
+ *
+ * @dst:	destination address
+ * @src:	source address
+ * @cnt:	number of bytes to copy
+ *
+ * Low level memory copy function that catches machine checks
+ *
+ * Return true for success, false for fail
+ */
+bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b5510..8286669 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
 struct thread_info {
@@ -134,10 +134,13 @@
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 #define _TIF_X32		(1 << TIF_X32)
 
-/* work to do in syscall_trace_enter() */
+/*
+ * work to do in syscall_trace_enter().  Also includes TIF_NOHZ for
+ * enter_from_user_mode()
+ */
 #define _TIF_WORK_SYSCALL_ENTRY	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
-	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |	\
+	 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT |	\
 	 _TIF_NOHZ)
 
 /* work to do on any return to user space */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029..c24b422 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,8 +5,57 @@
 #include <linux/sched.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+			     unsigned long type)
+{
+	struct { u64 d[2]; } desc = { { pcid, addr } };
+
+	/*
+	 * The memory clobber is because the whole point is to invalidate
+	 * stale TLB entries and, especially if we're flushing global
+	 * mappings, we don't want the compiler to reorder any subsequent
+	 * memory accesses before the TLB flush.
+	 *
+	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+	 * invpcid (%rcx), %rax in long mode.
+	 */
+	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR		0
+#define INVPCID_TYPE_SINGLE_CTXT	1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
+#define INVPCID_TYPE_ALL_NON_GLOBAL	3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+				     unsigned long addr)
+{
+	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -104,6 +153,15 @@
 {
 	unsigned long flags;
 
+	if (static_cpu_has(X86_FEATURE_INVPCID)) {
+		/*
+		 * Using INVPCID is considerably faster than a pair of writes
+		 * to CR4 sandwiched inside an IRQ flag save/restore.
+		 */
+		invpcid_flush_all();
+		return;
+	}
+
 	/*
 	 * Read-modify-write to CR4 - protect it from preemption and
 	 * from interrupts. (Use the raw variant because this code can
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0fb4648..7f991bd5 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -119,12 +119,23 @@
 
 extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#define topology_logical_package_id(cpu)	(cpu_data(cpu).logical_proc_id)
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
+
+extern unsigned int __max_logical_packages;
+#define topology_max_packages()			(__max_logical_packages)
+int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+extern int topology_phys_to_logical_pkg(unsigned int pkg);
+#else
+#define topology_max_packages()			(1)
+static inline int
+topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a4a30e4..c0f27d7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -90,12 +90,11 @@
 	likely(!__range_not_ok(addr, size, user_addr_max()))
 
 /*
- * The exception table consists of pairs of addresses relative to the
- * exception table enty itself: the first is the address of an
- * instruction that is allowed to fault, and the second is the address
- * at which the program should continue.  No registers are modified,
- * so it is entirely up to the continuation code to figure out what to
- * do.
+ * The exception table consists of triples of addresses relative to the
+ * exception table entry itself. The first address is of an instruction
+ * that is allowed to fault, the second is the target at which the program
+ * should continue. The third is a handler function to deal with the fault
+ * caused by the instruction in the first field.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -104,13 +103,14 @@
  */
 
 struct exception_table_entry {
-	int insn, fixup;
+	int insn, fixup, handler;
 };
 /* This is not the generic standard exception_table_entry format */
 #define ARCH_HAS_SORT_EXTABLE
 #define ARCH_HAS_SEARCH_EXTABLE
 
-extern int fixup_exception(struct pt_regs *regs);
+extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern bool ex_has_fault_handler(unsigned long ip);
 extern int early_fixup_exception(unsigned long *ip);
 
 /*
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index f5dcb52..3fe0eac 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -48,20 +48,28 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__put_user_size(*(u8 *)from, (u8 __user *)to,
 					1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__put_user_size(*(u16 *)from, (u16 __user *)to,
 					2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__put_user_size(*(u32 *)from, (u32 __user *)to,
 					4, ret, 4);
+			__uaccess_end();
 			return ret;
 		case 8:
+			__uaccess_begin();
 			__put_user_size(*(u64 *)from, (u64 __user *)to,
 					8, ret, 8);
+			__uaccess_end();
 			return ret;
 		}
 	}
@@ -103,13 +111,19 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			__uaccess_end();
 			return ret;
 		}
 	}
@@ -148,13 +162,19 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			__uaccess_end();
 			return ret;
 		}
 	}
@@ -170,13 +190,19 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			__uaccess_end();
 			return ret;
 		}
 	}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c..3076986 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,7 +8,7 @@
 #include <linux/errno.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 
 /*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index deabaf9..43dc55b 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -13,9 +13,6 @@
 	void *data;
 	unsigned long size;   /* Always a multiple of PAGE_SIZE */
 
-	/* text_mapping.pages is big enough for data/size page pointers */
-	struct vm_special_mapping text_mapping;
-
 	unsigned long alt, alt_len;
 
 	long sym_vvar_start;  /* Negative offset to the vvar area */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index f556c48..e728699 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -37,6 +37,12 @@
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+extern int vclocks_used;
+static inline bool vclock_was_used(int vclock)
+{
+	return READ_ONCE(vclocks_used) & (1 << vclock);
+}
+
 static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
 {
 	unsigned ret;
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 968d57d..f320ee3 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -57,7 +57,7 @@
 {
 	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
 		return xen_pci_frontend->enable_msi(dev, vectors);
-	return -ENODEV;
+	return -ENOSYS;
 }
 static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
 {
@@ -69,7 +69,7 @@
 {
 	if (xen_pci_frontend && xen_pci_frontend->enable_msix)
 		return xen_pci_frontend->enable_msix(dev, vectors, nvec);
-	return -ENODEV;
+	return -ENOSYS;
 }
 static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
 {
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d485232..62d4111 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -256,7 +256,7 @@
 	__u16				cs;
 	__u16				gs;
 	__u16				fs;
-	__u16				__pad0;
+	__u16				ss;
 	__u64				err;
 	__u64				trapno;
 	__u64				oldmask;
@@ -341,9 +341,37 @@
 	__u64				rip;
 	__u64				eflags;		/* RFLAGS */
 	__u16				cs;
+
+	/*
+	 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+	 * Linux saved and restored fs and gs in these slots.  This
+	 * was counterproductive, as fsbase and gsbase were never
+	 * saved, so arch_prctl was presumably unreliable.
+	 *
+	 * These slots should never be reused without extreme caution:
+	 *
+	 *  - Some DOSEMU versions stash fs and gs in these slots manually,
+	 *    thus overwriting anything the kernel expects to be preserved
+	 *    in these slots.
+	 *
+	 *  - If these slots are ever needed for any other purpose,
+	 *    there is some risk that very old 64-bit binaries could get
+	 *    confused.  I doubt that many such binaries still work,
+	 *    though, since the same patch in 2.5.64 also removed the
+	 *    64-bit set_thread_area syscall, so it appears that there
+	 *    is no TLS API beyond modify_ldt that works in both pre-
+	 *    and post-2.5.64 kernels.
+	 *
+	 * If the kernel ever adds explicit fs, gs, fsbase, and gsbase
+	 * save/restore, it will most likely need to be opt-in and use
+	 * different context slots.
+	 */
 	__u16				gs;
 	__u16				fs;
-	__u16				__pad0;
+	union {
+		__u16			ss;	/* If UC_SIGCONTEXT_SS */
+		__u16			__pad0;	/* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
+	};
 	__u64				err;
 	__u64				trapno;
 	__u64				oldmask;
diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8..e3d1ec9 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,54 @@
 #ifndef _ASM_X86_UCONTEXT_H
 #define _ASM_X86_UCONTEXT_H
 
-#define UC_FP_XSTATE	0x1	/* indicates the presence of extended state
-				 * information in the memory layout pointed
-				 * by the fpstate pointer in the ucontext's
-				 * sigcontext struct (uc_mcontext).
-				 */
+/*
+ * Indicates the presence of extended state information in the memory
+ * layout pointed by the fpstate pointer in the ucontext's sigcontext
+ * struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE	0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ *     saved CS is not 64-bit)
+ *         new SS = saved SS  (will fail IRET and signal if invalid)
+ * else
+ *         new SS = a flat 32-bit data segment
+ *
+ * This behavior serves three purposes:
+ *
+ * - Legacy programs that construct a 64-bit sigcontext from scratch
+ *   with zero or garbage in the SS slot (e.g. old CRIU) and call
+ *   sigreturn will still work.
+ *
+ * - Old DOSEMU versions sometimes catch a signal from a segmented
+ *   context, delete the old SS segment (with modify_ldt), and change
+ *   the saved CS to a 64-bit segment.  These DOSEMU versions expect
+ *   sigreturn to send them back to 64-bit mode without killing them,
+ *   despite the fact that the SS selector when the signal was raised is
+ *   no longer valid.  UC_STRICT_RESTORE_SS will be clear, so the kernel
+ *   will fix up SS for these DOSEMU versions.
+ *
+ * - Old and new programs that catch a signal and return without
+ *   modifying the saved context will end up in exactly the state they
+ *   started in, even if they were running in a segmented context when
+ *   the signal was raised..  Old kernels would lose track of the
+ *   previous SS value.
+ */
+#define UC_SIGCONTEXT_SS	0x2
+#define UC_STRICT_RESTORE_SS	0x4
+#endif
 
 #include <asm-generic/ucontext.h>
 
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d1daead..adb3eaf 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -16,6 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/realmode.h>
 
+#include <linux/ftrace.h>
 #include "../../realmode/rm/wakeup.h"
 #include "sleep.h"
 
@@ -107,7 +108,13 @@
        saved_magic = 0x123456789abcdef0L;
 #endif /* CONFIG_64BIT */
 
+	/*
+	 * Pause/unpause graph tracing around do_suspend_lowlevel as it has
+	 * inconsistent call/return info after it jumps to the wakeup vector.
+	 */
+	pause_graph_tracing();
 	do_suspend_lowlevel();
+	unpause_graph_tracing();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8a5cdda..531b961 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2078,6 +2078,20 @@
 		cpu = cpumask_next_zero(-1, cpu_present_mask);
 
 	/*
+	 * This can happen on physical hotplug. The sanity check at boot time
+	 * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+	 * established.
+	 */
+	if (topology_update_package_map(apicid, cpu) < 0) {
+		int thiscpu = max + disabled_cpus;
+
+		pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+			   thiscpu, apicid);
+		disabled_cpus++;
+		return -ENOSPC;
+	}
+
+	/*
 	 * Validate version
 	 */
 	if (version == 0x0) {
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c..ab5c2c6 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@
 	unsigned long value;
 	unsigned int id = (x >> 24) & 0xff;
 
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
 		id |= (value << 2) & 0xff00;
 	}
@@ -178,7 +178,7 @@
 	this_cpu_write(cpu_llc_id, node);
 
 	/* Account for nodes per socket in multi-core-module processors */
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, val);
 		nodes = ((val >> 3) & 7) + 1;
 	}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f253218..fdb0fbf 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2521,6 +2521,7 @@
 {
 	int pin, ioapic, irq, irq_entry;
 	const struct cpumask *mask;
+	struct irq_desc *desc;
 	struct irq_data *idata;
 	struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@
 		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
 			continue;
 
-		idata = irq_get_irq_data(irq);
+		desc = irq_to_desc(irq);
+		raw_spin_lock_irq(&desc->lock);
+		idata = irq_desc_get_irq_data(desc);
 
 		/*
 		 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@
 		/* Might be lapic_chip for irq 0 */
 		if (chip->irq_set_affinity)
 			chip->irq_set_affinity(idata, mask, false);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 }
 #endif
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37..3b670df 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
+	int cpu, vector;
 
-	if (d->move_in_progress)
+	/*
+	 * If there is still a move in progress or the previous move has not
+	 * been cleaned up completely, tell the caller to come back later.
+	 */
+	if (d->move_in_progress ||
+	    cpumask_intersects(d->old_domain, cpu_online_mask))
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
+	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
+		int new_cpu, offset;
 
+		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+		/*
+		 * Clear the offline cpus from @vector_cpumask for searching
+		 * and verify whether the result overlaps with @mask. If true,
+		 * then the call to apic->cpu_mask_to_apicid_and() will
+		 * succeed as well. If not, no point in trying to find a
+		 * vector in this mask.
+		 */
+		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+		if (!cpumask_intersects(vector_searchmask, mask))
+			goto next_cpu;
+
 		if (cpumask_subset(vector_cpumask, d->domain)) {
-			err = 0;
 			if (cpumask_equal(vector_cpumask, d->domain))
-				break;
+				goto success;
 			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
+			 * Mark the cpus which are not longer in the mask for
+			 * cleanup.
 			 */
-			cpumask_andnot(d->old_domain, d->domain,
-				       vector_cpumask);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_and(d->domain, d->domain, vector_cpumask);
-			break;
+			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+			vector = d->cfg.vector;
+			goto update;
 		}
 
 		vector = current_vector;
@@ -158,45 +170,60 @@
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
 
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(d->old_domain, d->old_domain,
-				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, d->old_domain);
-			cpu = cpumask_first_and(vector_cpumask,
-						cpu_online_mask);
-			continue;
-		}
+		/* If the search wrapped around, try the next cpu */
+		if (unlikely(current_vector == vector))
+			goto next_cpu;
 
 		if (test_bit(vector, used_vectors))
 			goto next;
 
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+		for_each_cpu(new_cpu, vector_searchmask) {
 			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
 				goto next;
 		}
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (d->cfg.vector) {
+		/* Schedule the old vector for cleanup on all cpus */
+		if (d->cfg.vector)
 			cpumask_copy(d->old_domain, d->domain);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-		}
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		d->cfg.vector = vector;
-		cpumask_copy(d->domain, vector_cpumask);
-		err = 0;
-		break;
-	}
+		goto update;
 
-	if (!err) {
-		/* cache destination APIC IDs into cfg->dest_apicid */
-		err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-						   &d->cfg.dest_apicid);
+next_cpu:
+		/*
+		 * We exclude the current @vector_cpumask from the requested
+		 * @mask and try again with the next online cpu in the
+		 * result. We cannot modify @mask, so we use @vector_cpumask
+		 * as a temporary buffer here as it will be reassigned when
+		 * calling apic->vector_allocation_domain() above.
+		 */
+		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+		continue;
 	}
+	return -ENOSPC;
 
-	return err;
+update:
+	/*
+	 * Exclude offline cpus from the cleanup mask and set the
+	 * move_in_progress flag when the result is not empty.
+	 */
+	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+	d->move_in_progress = !cpumask_empty(d->old_domain);
+	d->cfg.vector = vector;
+	cpumask_copy(d->domain, vector_cpumask);
+success:
+	/*
+	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+	 * as we already established, that mask & d->domain & cpu_online_mask
+	 * is not empty.
+	 */
+	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+					    &d->cfg.dest_apicid));
+	return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
 	struct irq_desc *desc;
-	unsigned long flags;
 	int cpu, vector;
 
-	raw_spin_lock_irqsave(&vector_lock, flags);
 	BUG_ON(!data->cfg.vector);
 
 	vector = data->cfg.vector;
@@ -239,10 +264,13 @@
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress)) {
-		raw_spin_unlock_irqrestore(&vector_lock, flags);
+	/*
+	 * If move is in progress or the old_domain mask is not empty,
+	 * i.e. the cleanup IPI has not been processed yet, we need to remove
+	 * the old references to desc from all cpus vector tables.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain))
 		return;
-	}
 
 	desc = irq_to_desc(irq);
 	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@
 		}
 	}
 	data->move_in_progress = 0;
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
+	struct apic_chip_data *apic_data;
 	struct irq_data *irq_data;
+	unsigned long flags;
 	int i;
 
 	for (i = 0; i < nr_irqs; i++) {
 		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
 		if (irq_data && irq_data->chip_data) {
+			raw_spin_lock_irqsave(&vector_lock, flags);
 			clear_irq_vector(virq + i, irq_data->chip_data);
-			free_apic_chip_data(irq_data->chip_data);
+			apic_data = irq_data->chip_data;
+			irq_domain_reset_irq_data(irq_data);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+			free_apic_chip_data(apic_data);
 #ifdef	CONFIG_X86_IO_APIC
 			if (virq + i < nr_legacy_irqs())
 				legacy_irq_data[virq + i] = NULL;
 #endif
-			irq_domain_reset_irq_data(irq_data);
 		}
 	}
 }
@@ -406,6 +438,8 @@
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
 }
@@ -494,14 +528,7 @@
 		return -EINVAL;
 
 	err = assign_irq_vector(irq, data, dest);
-	if (err) {
-		if (assign_irq_vector(irq, data,
-				      irq_data_get_affinity_mask(irq_data)))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	return IRQ_SET_MASK_OK;
+	return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-
-		for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i),
-					    IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
+	raw_spin_lock(&vector_lock);
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
 	data->move_in_progress = 0;
+	if (!cpumask_empty(data->old_domain))
+		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+	raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@
 			goto unlock;
 
 		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
+		 * Nothing to cleanup if irq migration is in progress
+		 * or this cpu is not set in the cleanup mask.
 		 */
-		if (data->move_in_progress)
+		if (data->move_in_progress ||
+		    !cpumask_test_cpu(me, data->old_domain))
 			goto unlock;
 
+		/*
+		 * We have two cases to handle here:
+		 * 1) vector is unchanged but the target mask got reduced
+		 * 2) vector and the target mask has changed
+		 *
+		 * #1 is obvious, but in #2 we have two vectors with the same
+		 * irq descriptor: the old and the new vector. So we need to
+		 * make sure that we only cleanup the old vector. The new
+		 * vector has the current @vector number in the config and
+		 * this cpu is part of the target mask. We better leave that
+		 * one alone.
+		 */
 		if (vector == data->cfg.vector &&
 		    cpumask_test_cpu(me, data->domain))
 			goto unlock;
@@ -593,6 +625,7 @@
 			goto unlock;
 		}
 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		cpumask_clear_cpu(me, data->old_domain);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
@@ -621,12 +654,48 @@
 	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+	struct apic_chip_data *data = apic_chip_data(irqdata);
+	struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
-	if (cfg)
-		__irq_complete_move(cfg, cfg->vector);
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+
+	/*
+	 * This is tricky. If the cleanup of @data->old_domain has not been
+	 * done yet, then the following setaffinity call will fail with
+	 * -EBUSY. This can leave the interrupt in a stale state.
+	 *
+	 * The cleanup cannot make progress because we hold @desc->lock. So in
+	 * case @data->old_domain is not yet cleaned up, we need to drop the
+	 * lock and acquire it again. @desc cannot go away, because the
+	 * hotplug code holds the sparse irq lock.
+	 */
+	raw_spin_lock(&vector_lock);
+	/* Clean out all offline cpus (including ourself) first. */
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+	while (!cpumask_empty(data->old_domain)) {
+		raw_spin_unlock(&vector_lock);
+		raw_spin_unlock(&desc->lock);
+		cpu_relax();
+		raw_spin_lock(&desc->lock);
+		/*
+		 * Reevaluate apic_chip_data. It might have been cleared after
+		 * we dropped @desc->lock.
+		 */
+		data = apic_chip_data(irqdata);
+		if (!data)
+			return;
+		raw_spin_lock(&vector_lock);
+	}
+	raw_spin_unlock(&vector_lock);
 }
 #endif
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d760c6b..624db005 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -889,7 +889,10 @@
 		return;
 	}
 	pr_info("UV: Found %s hub\n", hub);
-	map_low_mmrs();
+
+	/* We now only need to map the MMRs on UV1 */
+	if (is_uv1_hub())
+		map_low_mmrs();
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524..5c04246 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -59,7 +59,6 @@
 
 #ifdef CONFIG_PARAVIRT
 	BLANK();
-	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
 	OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
 	OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce3902..ecdc1d2 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -7,7 +7,7 @@
 #include <linux/lguest.h>
 #include "../../../drivers/lguest/lg.h"
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_32.h>
 };
@@ -52,6 +52,11 @@
 	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
 	       offsetofend(struct tss_struct, SYSENTER_stack));
 
+	/* Offset from cpu_tss to SYSENTER_stack */
+	OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+	/* Size of SYSENTER_stack */
+	DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb..d875f97 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -4,17 +4,11 @@
 
 #include <asm/ia32.h>
 
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
 static char syscalls_64[] = {
 #include <asm/syscalls_64.h>
 };
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls_ia32[] = {
 #include <asm/syscalls_32.h>
 };
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5803130..0d373d7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,33 +30,11 @@
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
-					   perf_event_intel_uncore_snb.o \
-					   perf_event_intel_uncore_snbep.o \
-					   perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_msr.o
-endif
-
-
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_MICROCODE)			+= microcode/
 
-obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
 obj-$(CONFIG_HYPERVISOR_GUEST)		+= vmware.o hypervisor.o mshyperv.o
 
@@ -64,7 +42,7 @@
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
 
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
 
 targets += capflags.c
 $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a..97c59fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@
 		void (*f_vide)(void);
 		u64 d, d2;
 
-		printk(KERN_INFO "AMD K6 stepping B detected - ");
+		pr_info("AMD K6 stepping B detected - ");
 
 		/*
 		 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@
 		d = d2-d;
 
 		if (d > 20*K6_BUG_LOOP)
-			printk(KERN_CONT
-				"system stability may be impaired when more than 32 MB are used.\n");
+			pr_cont("system stability may be impaired when more than 32 MB are used.\n");
 		else
-			printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+			pr_cont("probably OK (after B9730xxxx).\n");
 	}
 
 	/* K6 with old style WHCR */
@@ -154,7 +153,7 @@
 			wbinvd();
 			wrmsr(MSR_K6_WHCR, l, h);
 			local_irq_restore(flags);
-			printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+			pr_info("Enabling old style K6 write allocation for %d Mb\n",
 				mbytes);
 		}
 		return;
@@ -175,7 +174,7 @@
 			wbinvd();
 			wrmsr(MSR_K6_WHCR, l, h);
 			local_irq_restore(flags);
-			printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+			pr_info("Enabling new style K6 write allocation for %d Mb\n",
 				mbytes);
 		}
 
@@ -202,7 +201,7 @@
 	 */
 	if (c->x86_model >= 6 && c->x86_model <= 10) {
 		if (!cpu_has(c, X86_FEATURE_XMM)) {
-			printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+			pr_info("Enabling disabled K7/SSE Support.\n");
 			msr_clear_bit(MSR_K7_HWCR, 15);
 			set_cpu_cap(c, X86_FEATURE_XMM);
 		}
@@ -216,9 +215,8 @@
 	if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
 		rdmsr(MSR_K7_CLK_CTL, l, h);
 		if ((l & 0xfff00000) != 0x20000000) {
-			printk(KERN_INFO
-			    "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
-					l, ((l & 0x000fffff)|0x20000000));
+			pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+				l, ((l & 0x000fffff)|0x20000000));
 			wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
 		}
 	}
@@ -485,7 +483,7 @@
 		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
 			unsigned long pfn = tseg >> PAGE_SHIFT;
 
-			printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+			pr_debug("tseg: %010llx\n", tseg);
 			if (pfn_range_is_mapped(pfn, pfn + 1))
 				set_memory_4k((unsigned long)__va(tseg), 1);
 		}
@@ -500,8 +498,7 @@
 
 			rdmsrl(MSR_K7_HWCR, val);
 			if (!(val & BIT(24)))
-				printk(KERN_WARNING FW_BUG "TSC doesn't count "
-					"with P0 frequency!\n");
+				pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
 		}
 	}
 
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 04f0fe5..a972ac4 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@
 {
 	identify_boot_cpu();
 #if !defined(CONFIG_SMP)
-	printk(KERN_INFO "CPU: ");
+	pr_info("CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	alternative_instructions();
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6..1661d8e 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
@@ -29,7 +29,7 @@
 			rdmsr(MSR_VIA_FCR, lo, hi);
 			lo |= ACE_FCR;		/* enable ACE unit */
 			wrmsr(MSR_VIA_FCR, lo, hi);
-			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+			pr_info("CPU: Enabled ACE h/w crypto\n");
 		}
 
 		/* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@
 			rdmsr(MSR_VIA_RNG, lo, hi);
 			lo |= RNG_ENABLE;	/* enable RNG unit */
 			wrmsr(MSR_VIA_RNG, lo, hi);
-			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+			pr_info("CPU: Enabled h/w RNG\n");
 		}
 
 		/* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@
 			name = "C6";
 			fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
 			fcr_clr = DPDC;
-			printk(KERN_NOTICE "Disabling bugged TSC.\n");
+			pr_notice("Disabling bugged TSC.\n");
 			clear_cpu_cap(c, X86_FEATURE_TSC);
 			break;
 		case 8:
@@ -163,11 +163,11 @@
 		newlo = (lo|fcr_set) & (~fcr_clr);
 
 		if (newlo != lo) {
-			printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+			pr_info("Centaur FCR was 0x%X now 0x%X\n",
 				lo, newlo);
 			wrmsr(MSR_IDT_FCR1, newlo, hi);
 		} else {
-			printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+			pr_info("Centaur FCR is 0x%X\n", lo);
 		}
 		/* Emulate MTRRs using Centaur's MCR. */
 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de..249461f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -162,6 +162,22 @@
 }
 __setup("nompx", x86_mpx_setup);
 
+static int __init x86_noinvpcid_setup(char *s)
+{
+	/* noinvpcid doesn't accept parameters */
+	if (s)
+		return -EINVAL;
+
+	/* do not emit a message if the feature is not present */
+	if (!boot_cpu_has(X86_FEATURE_INVPCID))
+		return 0;
+
+	setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+	pr_info("noinvpcid: INVPCID feature disabled\n");
+	return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
 #ifdef CONFIG_X86_32
 static int cachesize_override = -1;
 static int disable_x86_serial_nr = 1;
@@ -228,7 +244,7 @@
 	lo |= 0x200000;
 	wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 
-	printk(KERN_NOTICE "CPU serial number disabled.\n");
+	pr_notice("CPU serial number disabled.\n");
 	clear_cpu_cap(c, X86_FEATURE_PN);
 
 	/* Disabling the serial number may affect the cpuid level */
@@ -329,9 +345,8 @@
 		if (!warn)
 			continue;
 
-		printk(KERN_WARNING
-		       "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
-				x86_cap_flag(df->feature), df->level);
+		pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+			x86_cap_flag(df->feature), df->level);
 	}
 }
 
@@ -510,7 +525,7 @@
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 	if (smp_num_siblings == 1) {
-		printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+		pr_info_once("CPU0: Hyper-Threading is disabled\n");
 		goto out;
 	}
 
@@ -531,10 +546,10 @@
 
 out:
 	if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-		       c->phys_proc_id);
-		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-		       c->cpu_core_id);
+		pr_info("CPU: Physical Processor ID: %d\n",
+			c->phys_proc_id);
+		pr_info("CPU: Processor Core ID: %d\n",
+			c->cpu_core_id);
 		printed = 1;
 	}
 #endif
@@ -559,9 +574,8 @@
 		}
 	}
 
-	printk_once(KERN_ERR
-			"CPU: vendor_id '%s' unknown, using generic init.\n" \
-			"CPU: Your system may be unstable.\n", v);
+	pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+		    "CPU: Your system may be unstable.\n", v);
 
 	c->x86_vendor = X86_VENDOR_UNKNOWN;
 	this_cpu = &default_cpu;
@@ -760,7 +774,7 @@
 	int count = 0;
 
 #ifdef CONFIG_PROCESSOR_SELECT
-	printk(KERN_INFO "KERNEL supported cpus:\n");
+	pr_info("KERNEL supported cpus:\n");
 #endif
 
 	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +792,7 @@
 			for (j = 0; j < 2; j++) {
 				if (!cpudev->c_ident[j])
 					continue;
-				printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+				pr_info("  %s %s\n", cpudev->c_vendor,
 					cpudev->c_ident[j]);
 			}
 		}
@@ -803,6 +817,31 @@
 #else
 	set_cpu_cap(c, X86_FEATURE_NOPL);
 #endif
+
+	/*
+	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
+	 * systems that run Linux at CPL > 0 may or may not have the
+	 * issue, but, even if they have the issue, there's absolutely
+	 * nothing we can do about it because we can't use the real IRET
+	 * instruction.
+	 *
+	 * NB: For the time being, only 32-bit kernels support
+	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
+	 * whether to apply espfix using paravirt hooks.  If any
+	 * non-paravirt system ever shows up that does *not* have the
+	 * ESPFIX issue, we can change this.
+	 */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+	do {
+		extern void native_iret(void);
+		if (pv_cpu_ops.iret == native_iret)
+			set_cpu_bug(c, X86_BUG_ESPFIX);
+	} while (0);
+#else
+	set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
 }
 
 static void generic_identify(struct cpuinfo_x86 *c)
@@ -977,6 +1016,8 @@
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
+	/* The boot/hotplug time assigment got cleared, restore it */
+	c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
 }
 
 /*
@@ -1061,7 +1102,7 @@
 		for (index = index_min; index < index_max; index++) {
 			if (rdmsrl_safe(index, &val))
 				continue;
-			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+			pr_info(" MSR%08x: %016llx\n", index, val);
 		}
 	}
 }
@@ -1100,19 +1141,19 @@
 	}
 
 	if (vendor && !strstr(c->x86_model_id, vendor))
-		printk(KERN_CONT "%s ", vendor);
+		pr_cont("%s ", vendor);
 
 	if (c->x86_model_id[0])
-		printk(KERN_CONT "%s", c->x86_model_id);
+		pr_cont("%s", c->x86_model_id);
 	else
-		printk(KERN_CONT "%d86", c->x86);
+		pr_cont("%d86", c->x86);
 
-	printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+	pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
 	if (c->x86_mask || c->cpuid_level >= 0)
-		printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+		pr_cont(", stepping: 0x%x)\n", c->x86_mask);
 	else
-		printk(KERN_CONT ")\n");
+		pr_cont(")\n");
 
 	print_cpu_msr(c);
 }
@@ -1438,7 +1479,7 @@
 
 	show_ucode_info_early();
 
-	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+	pr_info("Initializing CPU#%d\n", cpu);
 
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
 	    cpu_has_tsc ||
@@ -1475,20 +1516,6 @@
 }
 #endif
 
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
-	WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e..6adef9c 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
+#include <asm/cpufeature.h>
 
 #include "cpu.h"
 
@@ -103,7 +104,7 @@
 		local_irq_restore(flags);
 
 		if (ccr5 & 2) { /* possible wrong calibration done */
-			printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+			pr_info("Recalibrating delay loop with SLOP bit reset\n");
 			calibrate_delay();
 			c->loops_per_jiffy = loops_per_jiffy;
 		}
@@ -115,7 +116,7 @@
 {
 	u8 ccr3;
 
-	printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+	pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 
@@ -128,7 +129,7 @@
 
 static void set_cx86_memwb(void)
 {
-	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+	pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
 	/* CCR2 bit 2: unlock NW bit */
 	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +269,7 @@
 		 *  VSA1 we work around however.
 		 */
 
-		printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+		pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
 		isa_dma_bridge_buggy = 2;
 
 		/* We do this before the PCI layer is running. However we
@@ -426,7 +427,7 @@
 		if (dir0 == 5 || dir0 == 3) {
 			unsigned char ccr3;
 			unsigned long flags;
-			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+			pr_info("Enabling CPUID on Cyrix processor.\n");
 			local_irq_save(flags);
 			ccr3 = getCx86(CX86_CCR3);
 			/* enable MAPEN  */
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index d820d8e..73d391a 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -56,7 +56,7 @@
 	}
 
 	if (max_pri)
-		printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+		pr_info("Hypervisor detected: %s\n", x86_hyper->name);
 }
 
 void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648bc..1f7fdb9 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/bugs.h>
@@ -61,7 +61,7 @@
 	 */
 	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
 	    c->microcode < 0x20e) {
-		printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+		pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
 		clear_cpu_cap(c, X86_FEATURE_PSE);
 	}
 
@@ -140,7 +140,7 @@
 	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
-			printk(KERN_INFO "Disabled fast string operations\n");
+			pr_info("Disabled fast string operations\n");
 			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
 			setup_clear_cpu_cap(X86_FEATURE_ERMS);
 		}
@@ -160,6 +160,19 @@
 		pr_info("Disabling PGE capability bit\n");
 		setup_clear_cpu_cap(X86_FEATURE_PGE);
 	}
+
+	if (c->cpuid_level >= 0x00000001) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+		/*
+		 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+		 * apicids which are reserved per package. Store the resulting
+		 * shift value for the package management code.
+		 */
+		if (edx & (1U << 28))
+			c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+	}
 }
 
 #ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@
 	    boot_cpu_data.x86 == 6 &&
 	    boot_cpu_data.x86_model == 1 &&
 	    boot_cpu_data.x86_mask < 8) {
-		printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+		pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
 		return 1;
 	}
 	return 0;
@@ -225,7 +238,7 @@
 
 		set_cpu_bug(c, X86_BUG_F00F);
 		if (!f00f_workaround_enabled) {
-			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+			pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
 			f00f_workaround_enabled = 1;
 		}
 	}
@@ -244,7 +257,7 @@
 	 * Forcefully enable PAE if kernel parameter "forcepae" is present.
 	 */
 	if (forcepae) {
-		printk(KERN_WARNING "PAE forced!\n");
+		pr_warn("PAE forced!\n");
 		set_cpu_cap(c, X86_FEATURE_PAE);
 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
 	}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c523..de6626c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
@@ -444,7 +444,7 @@
 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 	if (err) {
 		if (err == -EEXIST)
-			pr_warning("L3 slot %d in use/index already disabled!\n",
+			pr_warn("L3 slot %d in use/index already disabled!\n",
 				   slot);
 		return err;
 	}
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d..fbb5e90 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
 #include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba43..517619e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -115,7 +115,7 @@
 	int cpu = m->extcpu;
 
 	if (m->inject_flags & MCJ_EXCEPTION) {
-		printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+		pr_info("Triggering MCE exception on CPU %d\n", cpu);
 		switch (context) {
 		case MCJ_CTX_IRQ:
 			/*
@@ -128,15 +128,15 @@
 			raise_exception(m, NULL);
 			break;
 		default:
-			printk(KERN_INFO "Invalid MCE context\n");
+			pr_info("Invalid MCE context\n");
 			ret = -EINVAL;
 		}
-		printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+		pr_info("MCE exception done on CPU %d\n", cpu);
 	} else if (m->status) {
-		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+		pr_info("Starting machine check poll CPU %d\n", cpu);
 		raise_poll(m);
 		mce_notify_irq();
-		printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+		pr_info("Machine check poll done on CPU %d\n", cpu);
 	} else
 		m->finished = 0;
 
@@ -183,8 +183,7 @@
 		start = jiffies;
 		while (!cpumask_empty(mce_inject_cpumask)) {
 			if (!time_before(jiffies, start + 2*HZ)) {
-				printk(KERN_ERR
-				"Timeout waiting for mce inject %lx\n",
+				pr_err("Timeout waiting for mce inject %lx\n",
 					*cpumask_bits(mce_inject_cpumask));
 				break;
 			}
@@ -241,7 +240,7 @@
 {
 	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
 		return -ENOMEM;
-	printk(KERN_INFO "Machine check injector initialized\n");
+	pr_info("Machine check injector initialized\n");
 	register_mce_write_callback(mce_write);
 	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
 				"mce_notify");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c2..5119766 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <asm/mce.h>
+#include <asm/uaccess.h>
 
 #include "mce-internal.h"
 
@@ -29,7 +30,7 @@
  * panic situations)
  */
 
-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
 enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
@@ -48,6 +49,7 @@
 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
 #define  KERNEL		.context = IN_KERNEL
 #define  USER		.context = IN_USER
+#define  KERNEL_RECOV	.context = IN_KERNEL_RECOV
 #define  SER		.ser = SER_REQUIRED
 #define  NOSER		.ser = NO_SER
 #define  EXCP		.excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@
 		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
 		),
 	MCESEV(
+		PANIC, "In kernel and no restart IP",
+		EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+		),
+	MCESEV(
 		DEFERRED, "Deferred error",
 		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
 		),
@@ -123,6 +129,11 @@
 		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
 		),
 	MCESEV(
+		AR, "Action required: data load in error recoverable area of kernel",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+		KERNEL_RECOV
+		),
+	MCESEV(
 		AR, "Action required: data load error in a user process",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
 		USER
@@ -170,6 +181,9 @@
 		)	/* always matches. keep at end */
 };
 
+#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
+				(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+
 /*
  * If mcgstatus indicated that ip/cs on the stack were
  * no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@
  */
 static int error_context(struct mce *m)
 {
-	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+	if ((m->cs & 3) == 3)
+		return IN_USER;
+	if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
+		return IN_KERNEL_RECOV;
+	return IN_KERNEL;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a006f4c..f0c921b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -961,6 +961,20 @@
 	}
 }
 
+static int do_memory_failure(struct mce *m)
+{
+	int flags = MF_ACTION_REQUIRED;
+	int ret;
+
+	pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
+	if (!(m->mcgstatus & MCG_STATUS_RIPV))
+		flags |= MF_MUST_KILL;
+	ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+	if (ret)
+		pr_err("Memory error not recovered");
+	return ret;
+}
+
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
@@ -998,8 +1012,6 @@
 	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
 	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
 	char *msg = "Unknown";
-	u64 recover_paddr = ~0ull;
-	int flags = MF_ACTION_REQUIRED;
 	int lmce = 0;
 
 	/* If this CPU is offline, just bail out. */
@@ -1136,22 +1148,13 @@
 	}
 
 	/*
-	 * At insane "tolerant" levels we take no action. Otherwise
-	 * we only die if we have no other choice. For less serious
-	 * issues we try to recover, or limit damage to the current
-	 * process.
+	 * If tolerant is at an insane level we drop requests to kill
+	 * processes and continue even when there is no way out.
 	 */
-	if (cfg->tolerant < 3) {
-		if (no_way_out)
-			mce_panic("Fatal machine check on current CPU", &m, msg);
-		if (worst == MCE_AR_SEVERITY) {
-			recover_paddr = m.addr;
-			if (!(m.mcgstatus & MCG_STATUS_RIPV))
-				flags |= MF_MUST_KILL;
-		} else if (kill_it) {
-			force_sig(SIGBUS, current);
-		}
-	}
+	if (cfg->tolerant == 3)
+		kill_it = 0;
+	else if (no_way_out)
+		mce_panic("Fatal machine check on current CPU", &m, msg);
 
 	if (worst > 0)
 		mce_report_event(regs);
@@ -1159,25 +1162,24 @@
 out:
 	sync_core();
 
-	if (recover_paddr == ~0ull)
-		goto done;
+	if (worst != MCE_AR_SEVERITY && !kill_it)
+		goto out_ist;
 
-	pr_err("Uncorrected hardware memory error in user-access at %llx",
-		 recover_paddr);
-	/*
-	 * We must call memory_failure() here even if the current process is
-	 * doomed. We still need to mark the page as poisoned and alert any
-	 * other users of the page.
-	 */
-	ist_begin_non_atomic(regs);
-	local_irq_enable();
-	if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
-		pr_err("Memory error not recovered");
-		force_sig(SIGBUS, current);
+	/* Fault was in user mode and we need to take some action */
+	if ((m.cs & 3) == 3) {
+		ist_begin_non_atomic(regs);
+		local_irq_enable();
+
+		if (kill_it || do_memory_failure(&m))
+			force_sig(SIGBUS, current);
+		local_irq_disable();
+		ist_end_non_atomic();
+	} else {
+		if (!fixup_exception(regs, X86_TRAP_MC))
+			mce_panic("Failed kernel mode recovery", &m, NULL);
 	}
-	local_irq_disable();
-	ist_end_non_atomic();
-done:
+
+out_ist:
 	ist_exit(regs);
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1576,6 +1578,17 @@
 
 		if (c->x86 == 6 && c->x86_model == 45)
 			quirk_no_way_out = quirk_sandybridge_ifu;
+		/*
+		 * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+		 * whether this processor will actually generate recoverable
+		 * machine checks. Check to see if this is an E7 model Xeon.
+		 * We can't do a model number check because E5 and E7 use the
+		 * same model number. E5 doesn't support recovery, E7 does.
+		 */
+		if (mca_cfg.recovery || (mca_cfg.ser &&
+			!strncmp(c->x86_model_id,
+				 "Intel(R) Xeon(R) CPU E7-", 24)))
+			set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
 	}
 	if (cfg->monarch_timeout < 0)
 		cfg->monarch_timeout = 0;
@@ -1617,10 +1630,10 @@
 	case X86_VENDOR_AMD: {
 		u32 ebx = cpuid_ebx(0x80000007);
 
-		mce_amd_feature_init(c);
 		mce_flags.overflow_recov = !!(ebx & BIT(0));
 		mce_flags.succor	 = !!(ebx & BIT(1));
 		mce_flags.smca		 = !!(ebx & BIT(3));
+		mce_amd_feature_init(c);
 
 		break;
 		}
@@ -2028,6 +2041,8 @@
 		cfg->bootlog = (str[0] == 'b');
 	else if (!strcmp(str, "bios_cmci_threshold"))
 		cfg->bios_cmci_threshold = true;
+	else if (!strcmp(str, "recovery"))
+		cfg->recovery = true;
 	else if (isdigit(str[0])) {
 		if (get_option(&str, &cfg->tolerant) == 2)
 			get_option(&str, &(cfg->monarch_timeout));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index e99b150..9d656fd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,5 +1,5 @@
 /*
- *  (c) 2005-2015 Advanced Micro Devices, Inc.
+ *  (c) 2005-2016 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
@@ -28,7 +28,7 @@
 #include <asm/msr.h>
 #include <asm/trace/irq_vectors.h>
 
-#define NR_BLOCKS         9
+#define NR_BLOCKS         5
 #define THRESHOLD_MAX     0xFFF
 #define INT_TYPE_APIC     0x00020000
 #define MASK_VALID_HI     0x80000000
@@ -49,6 +49,19 @@
 #define DEF_LVT_OFF		0x2
 #define DEF_INT_TYPE_APIC	0x2
 
+/* Scalable MCA: */
+
+/* Threshold LVT offset is at MSR0xC0000410[15:12] */
+#define SMCA_THR_LVT_OFF	0xF000
+
+/*
+ * OS is required to set the MCAX bit to acknowledge that it is now using the
+ * new MSR ranges and new registers under each bank. It also means that the OS
+ * will configure deferred errors in the new MCx_CONFIG register. If the bit is
+ * not set, uncorrectable errors will cause a system panic.
+ */
+#define SMCA_MCAX_EN_OFF	0x1
+
 static const char * const th_names[] = {
 	"load_store",
 	"insn_fetch",
@@ -58,6 +71,35 @@
 	"execution_unit",
 };
 
+/* Define HWID to IP type mappings for Scalable MCA */
+struct amd_hwid amd_hwids[] = {
+	[SMCA_F17H_CORE]	= { "f17h_core",	0xB0 },
+	[SMCA_DF]		= { "data_fabric",	0x2E },
+	[SMCA_UMC]		= { "umc",		0x96 },
+	[SMCA_PB]		= { "param_block",	0x5 },
+	[SMCA_PSP]		= { "psp",		0xFF },
+	[SMCA_SMU]		= { "smu",		0x1 },
+};
+EXPORT_SYMBOL_GPL(amd_hwids);
+
+const char * const amd_core_mcablock_names[] = {
+	[SMCA_LS]		= "load_store",
+	[SMCA_IF]		= "insn_fetch",
+	[SMCA_L2_CACHE]		= "l2_cache",
+	[SMCA_DE]		= "decode_unit",
+	[RES]			= "",
+	[SMCA_EX]		= "execution_unit",
+	[SMCA_FP]		= "floating_point",
+	[SMCA_L3_CACHE]		= "l3_cache",
+};
+EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+
+const char * const amd_df_mcablock_names[] = {
+	[SMCA_CS]		= "coherent_slave",
+	[SMCA_PIE]		= "pie",
+};
+EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
 
@@ -84,6 +126,13 @@
 
 static inline bool is_shared_bank(int bank)
 {
+	/*
+	 * Scalable MCA provides for only one core to have access to the MSRs of
+	 * a shared bank.
+	 */
+	if (mce_flags.smca)
+		return false;
+
 	/* Bank 4 is for northbridge reporting and is thus shared */
 	return (bank == 4);
 }
@@ -135,6 +184,14 @@
 	}
 
 	if (apic != msr) {
+		/*
+		 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+		 * the BIOS provides the value. The original field where LVT offset
+		 * was set is reserved. Return early here:
+		 */
+		if (mce_flags.smca)
+			return 0;
+
 		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
 		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
 		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -144,10 +201,7 @@
 	return 1;
 };
 
-/*
- * Called via smp_call_function_single(), must be called with correct
- * cpu affinity.
- */
+/* Reprogram MCx_MISC MSR behind this threshold bank. */
 static void threshold_restart_bank(void *_tr)
 {
 	struct thresh_restart *tr = _tr;
@@ -247,27 +301,116 @@
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+			     unsigned int bank, unsigned int block)
+{
+	u32 addr = 0, offset = 0;
+
+	if (mce_flags.smca) {
+		if (!block) {
+			addr = MSR_AMD64_SMCA_MCx_MISC(bank);
+		} else {
+			/*
+			 * For SMCA enabled processors, BLKPTR field of the
+			 * first MISC register (MCx_MISC0) indicates presence of
+			 * additional MISC register set (MISC1-4).
+			 */
+			u32 low, high;
+
+			if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+				return addr;
+
+			if (!(low & MCI_CONFIG_MCAX))
+				return addr;
+
+			if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+			    (low & MASK_BLKPTR_LO))
+				addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+		}
+		return addr;
+	}
+
+	/* Fall back to method we used for older processors: */
+	switch (block) {
+	case 0:
+		addr = MSR_IA32_MCx_MISC(bank);
+		break;
+	case 1:
+		offset = ((low & MASK_BLKPTR_LO) >> 21);
+		if (offset)
+			addr = MCG_XBLK_ADDR + offset;
+		break;
+	default:
+		addr = ++current_addr;
+	}
+	return addr;
+}
+
+static int
+prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+			int offset, u32 misc_high)
+{
+	unsigned int cpu = smp_processor_id();
+	struct threshold_block b;
+	int new;
+
+	if (!block)
+		per_cpu(bank_map, cpu) |= (1 << bank);
+
+	memset(&b, 0, sizeof(b));
+	b.cpu			= cpu;
+	b.bank			= bank;
+	b.block			= block;
+	b.address		= addr;
+	b.interrupt_capable	= lvt_interrupt_supported(bank, misc_high);
+
+	if (!b.interrupt_capable)
+		goto done;
+
+	b.interrupt_enable = 1;
+
+	if (mce_flags.smca) {
+		u32 smca_low, smca_high;
+		u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+		if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+			smca_high |= SMCA_MCAX_EN_OFF;
+			wrmsr(smca_addr, smca_low, smca_high);
+		}
+
+		/* Gather LVT offset for thresholding: */
+		if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+			goto out;
+
+		new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+	} else {
+		new = (misc_high & MASK_LVTOFF_HI) >> 20;
+	}
+
+	offset = setup_APIC_mce_threshold(offset, new);
+
+	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
+		mce_threshold_vector = amd_threshold_interrupt;
+
+done:
+	mce_threshold_block_init(&b, offset);
+
+out:
+	return offset;
+}
+
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
-	struct threshold_block b;
-	unsigned int cpu = smp_processor_id();
 	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
-	int offset = -1, new;
+	int offset = -1;
 
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0)
-				address = MSR_IA32_MCx_MISC(bank);
-			else if (block == 1) {
-				address = (low & MASK_BLKPTR_LO) >> 21;
-				if (!address)
-					break;
-
-				address += MCG_XBLK_ADDR;
-			} else
-				++address;
+			address = get_block_address(address, low, high, bank, block);
+			if (!address)
+				break;
 
 			if (rdmsr_safe(address, &low, &high))
 				break;
@@ -279,29 +422,7 @@
 			     (high & MASK_LOCKED_HI))
 				continue;
 
-			if (!block)
-				per_cpu(bank_map, cpu) |= (1 << bank);
-
-			memset(&b, 0, sizeof(b));
-			b.cpu			= cpu;
-			b.bank			= bank;
-			b.block			= block;
-			b.address		= address;
-			b.interrupt_capable	= lvt_interrupt_supported(bank, high);
-
-			if (!b.interrupt_capable)
-				goto init;
-
-			b.interrupt_enable = 1;
-			new	= (high & MASK_LVTOFF_HI) >> 20;
-			offset  = setup_APIC_mce_threshold(offset, new);
-
-			if ((offset == new) &&
-			    (mce_threshold_vector != amd_threshold_interrupt))
-				mce_threshold_vector = amd_threshold_interrupt;
-
-init:
-			mce_threshold_block_init(&b, offset);
+			offset = prepare_threshold_block(bank, block, address, offset, high);
 		}
 	}
 
@@ -394,16 +515,9 @@
 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0) {
-				address = MSR_IA32_MCx_MISC(bank);
-			} else if (block == 1) {
-				address = (low & MASK_BLKPTR_LO) >> 21;
-				if (!address)
-					break;
-				address += MCG_XBLK_ADDR;
-			} else {
-				++address;
-			}
+			address = get_block_address(address, low, high, bank, block);
+			if (!address)
+				break;
 
 			if (rdmsr_safe(address, &low, &high))
 				break;
@@ -623,16 +737,11 @@
 	if (err)
 		goto out_free;
 recurse:
-	if (!block) {
-		address = (low & MASK_BLKPTR_LO) >> 21;
-		if (!address)
-			return 0;
-		address += MCG_XBLK_ADDR;
-	} else {
-		++address;
-	}
+	address = get_block_address(address, low, high, bank, ++block);
+	if (!address)
+		return 0;
 
-	err = allocate_threshold_blocks(cpu, bank, ++block, address);
+	err = allocate_threshold_blocks(cpu, bank, block, address);
 	if (err)
 		goto out_free;
 
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 12402e1..2a0717b 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -26,14 +26,12 @@
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
 
-	printk(KERN_EMERG
-		"CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
-		smp_processor_id(), loaddr, lotype);
+	pr_emerg("CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
+		 smp_processor_id(), loaddr, lotype);
 
 	if (lotype & (1<<5)) {
-		printk(KERN_EMERG
-			"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
-			smp_processor_id());
+		pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+			 smp_processor_id());
 	}
 
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -61,12 +59,10 @@
 	/* Read registers before enabling: */
 	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-	printk(KERN_INFO
-	       "Intel old style machine check architecture supported.\n");
+	pr_info("Intel old style machine check architecture supported.\n");
 
 	/* Enable MCE: */
 	cr4_set_bits(X86_CR4_MCE);
-	printk(KERN_INFO
-	       "Intel old style machine check reporting enabled on CPU#%d.\n",
-	       smp_processor_id());
+	pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
 }
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 2c5aaf8..0b445c2 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -190,7 +190,7 @@
 	/* if we just entered the thermal event */
 	if (new_event) {
 		if (event == THERMAL_THROTTLING_EVENT)
-			printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+			pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
@@ -198,8 +198,7 @@
 	}
 	if (old_event) {
 		if (event == THERMAL_THROTTLING_EVENT)
-			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
-				this_cpu,
+			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package");
 		return 1;
 	}
@@ -417,8 +416,8 @@
 
 static void unexpected_thermal_interrupt(void)
 {
-	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
-			smp_processor_id());
+	pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+		smp_processor_id());
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -499,7 +498,7 @@
 
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		if (system_state == SYSTEM_BOOTING)
-			printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+			pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
 		return;
 	}
 
@@ -557,8 +556,8 @@
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
-	printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
-		       tm2 ? "TM2" : "TM1");
+	pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+		      tm2 ? "TM2" : "TM1");
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..fcf9ae9 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -12,8 +12,8 @@
 
 static void default_threshold_interrupt(void)
 {
-	printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
-			 THRESHOLD_APIC_VECTOR);
+	pr_err("Unexpected threshold interrupt at vector %x\n",
+		THRESHOLD_APIC_VECTOR);
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 01dd870..c6a722e 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -17,7 +17,7 @@
 {
 	ist_enter(regs);
 
-	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+	pr_emerg("CPU0: Machine Check Exception.\n");
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
 	ist_exit(regs);
@@ -39,6 +39,5 @@
 
 	cr4_set_bits(X86_CR4_MCE);
 
-	printk(KERN_INFO
-	       "Winchip machine check reporting enabled on CPU#0.\n");
+	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 2233f8a..8581963 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -431,10 +431,6 @@
 	else
 		container = cont_va;
 
-	if (ucode_new_rev)
-		pr_info("microcode: updated early to new patch_level=0x%08x\n",
-			ucode_new_rev);
-
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
@@ -469,8 +465,7 @@
 	if (mc && rev < mc->hdr.patch_id) {
 		if (!__apply_microcode_amd(mc)) {
 			ucode_new_rev = mc->hdr.patch_id;
-			pr_info("microcode: reload patch_level=0x%08x\n",
-				ucode_new_rev);
+			pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
 		}
 	}
 }
@@ -793,15 +788,13 @@
 		return -EINVAL;
 	}
 
-	patch->data = kzalloc(patch_size, GFP_KERNEL);
+	patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL);
 	if (!patch->data) {
 		pr_err("Patch data allocation failure.\n");
 		kfree(patch);
 		return -EINVAL;
 	}
 
-	/* All looks ok, copy patch... */
-	memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
 	INIT_LIST_HEAD(&patch->plist);
 	patch->patch_id  = mc_hdr->patch_id;
 	patch->equiv_cpu = proc_id;
@@ -953,10 +946,14 @@
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
 	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
-		pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+		pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
 		return NULL;
 	}
 
+	if (ucode_new_rev)
+		pr_info_once("microcode updated early to new patch_level=0x%08x\n",
+			     ucode_new_rev);
+
 	return &microcode_amd_ops;
 }
 
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index faec712..ac360bf 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,16 +43,8 @@
 #define MICROCODE_VERSION	"2.01"
 
 static struct microcode_ops	*microcode_ops;
-
 static bool dis_ucode_ldr;
 
-static int __init disable_loader(char *str)
-{
-	dis_ucode_ldr = true;
-	return 1;
-}
-__setup("dis_ucode_ldr", disable_loader);
-
 /*
  * Synchronization.
  *
@@ -81,15 +73,16 @@
 
 static bool __init check_loader_disabled_bsp(void)
 {
+	static const char *__dis_opt_str = "dis_ucode_ldr";
+
 #ifdef CONFIG_X86_32
 	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
-	const char *opt	    = "dis_ucode_ldr";
-	const char *option  = (const char *)__pa_nodebug(opt);
+	const char *option  = (const char *)__pa_nodebug(__dis_opt_str);
 	bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
 
 #else /* CONFIG_X86_64 */
 	const char *cmdline = boot_command_line;
-	const char *option  = "dis_ucode_ldr";
+	const char *option  = __dis_opt_str;
 	bool *res = &dis_ucode_ldr;
 #endif
 
@@ -479,7 +472,7 @@
 	enum ucode_state ustate;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	if (uci && uci->valid)
+	if (uci->valid)
 		return UCODE_OK;
 
 	if (collect_cpu_info(cpu))
@@ -630,7 +623,7 @@
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	int error;
 
-	if (paravirt_enabled() || dis_ucode_ldr)
+	if (dis_ucode_ldr)
 		return -EINVAL;
 
 	if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ee81c54..cbb3cf0 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -39,9 +39,15 @@
 #include <asm/setup.h>
 #include <asm/msr.h>
 
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+/*
+ * Temporary microcode blobs pointers storage. We note here the pointers to
+ * microcode blobs we've got from whatever storage (detached initrd, builtin).
+ * Later on, we put those into final storage mc_saved_data.mc_saved.
+ */
+static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT];
+
 static struct mc_saved_data {
-	unsigned int mc_saved_count;
+	unsigned int num_saved;
 	struct microcode_intel **mc_saved;
 } mc_saved_data;
 
@@ -78,53 +84,50 @@
 }
 
 static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
-		  unsigned long off, int num_saved)
+copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs,
+	  unsigned long off, int num_saved)
 {
 	int i;
 
 	for (i = 0; i < num_saved; i++)
-		mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+		mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off);
 }
 
 #ifdef CONFIG_X86_32
 static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
-	       struct mc_saved_data *mc_saved_data)
+microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs)
 {
 	int i;
 	struct microcode_intel ***mc_saved;
 
-	mc_saved = (struct microcode_intel ***)
-		   __pa_nodebug(&mc_saved_data->mc_saved);
-	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+	mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved);
+
+	for (i = 0; i < mcs->num_saved; i++) {
 		struct microcode_intel *p;
 
-		p = *(struct microcode_intel **)
-			__pa_nodebug(mc_saved_data->mc_saved + i);
+		p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i);
 		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
 	}
 }
 #endif
 
 static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
-	       unsigned long initrd_start, struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
+	       unsigned long offset, struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int count = mc_saved_data->mc_saved_count;
+	unsigned int count = mcs->num_saved;
 
-	if (!mc_saved_data->mc_saved) {
-		copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+	if (!mcs->mc_saved) {
+		copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count);
 
 		return load_microcode_early(mc_saved_tmp, count, uci);
 	} else {
 #ifdef CONFIG_X86_32
-		microcode_phys(mc_saved_tmp, mc_saved_data);
+		microcode_phys(mc_saved_tmp, mcs);
 		return load_microcode_early(mc_saved_tmp, count, uci);
 #else
-		return load_microcode_early(mc_saved_data->mc_saved,
-						    count, uci);
+		return load_microcode_early(mcs->mc_saved, count, uci);
 #endif
 	}
 }
@@ -175,25 +178,25 @@
 }
 
 static int
-save_microcode(struct mc_saved_data *mc_saved_data,
+save_microcode(struct mc_saved_data *mcs,
 	       struct microcode_intel **mc_saved_src,
-	       unsigned int mc_saved_count)
+	       unsigned int num_saved)
 {
 	int i, j;
 	struct microcode_intel **saved_ptr;
 	int ret;
 
-	if (!mc_saved_count)
+	if (!num_saved)
 		return -EINVAL;
 
 	/*
 	 * Copy new microcode data.
 	 */
-	saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+	saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL);
 	if (!saved_ptr)
 		return -ENOMEM;
 
-	for (i = 0; i < mc_saved_count; i++) {
+	for (i = 0; i < num_saved; i++) {
 		struct microcode_header_intel *mc_hdr;
 		struct microcode_intel *mc;
 		unsigned long size;
@@ -207,20 +210,18 @@
 		mc_hdr = &mc->hdr;
 		size   = get_totalsize(mc_hdr);
 
-		saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+		saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL);
 		if (!saved_ptr[i]) {
 			ret = -ENOMEM;
 			goto err;
 		}
-
-		memcpy(saved_ptr[i], mc, size);
 	}
 
 	/*
 	 * Point to newly saved microcode.
 	 */
-	mc_saved_data->mc_saved = saved_ptr;
-	mc_saved_data->mc_saved_count = mc_saved_count;
+	mcs->mc_saved  = saved_ptr;
+	mcs->num_saved = num_saved;
 
 	return 0;
 
@@ -284,22 +285,20 @@
  * BSP can stay in the platform.
  */
 static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
-			     void *data, size_t size,
-			     struct mc_saved_data *mc_saved_data,
-			     unsigned long *mc_saved_in_initrd,
+get_matching_model_microcode(unsigned long start, void *data, size_t size,
+			     struct mc_saved_data *mcs, unsigned long *mc_ptrs,
 			     struct ucode_cpu_info *uci)
 {
-	u8 *ucode_ptr = data;
-	unsigned int leftover = size;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
+	struct microcode_header_intel *mc_header;
+	unsigned int num_saved = mcs->num_saved;
+	enum ucode_state state = UCODE_OK;
+	unsigned int leftover = size;
+	u8 *ucode_ptr = data;
+	unsigned int mc_size;
 	int i;
 
-	while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+	while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) {
 
 		if (leftover < sizeof(mc_header))
 			break;
@@ -318,32 +317,31 @@
 		 * the platform, we need to find and save microcode patches
 		 * with the same family and model as the BSP.
 		 */
-		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
-			 UCODE_OK) {
+		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) {
 			ucode_ptr += mc_size;
 			continue;
 		}
 
-		mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+		num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved);
 
 		ucode_ptr += mc_size;
 	}
 
 	if (leftover) {
 		state = UCODE_ERROR;
-		goto out;
+		return state;
 	}
 
-	if (mc_saved_count == 0) {
+	if (!num_saved) {
 		state = UCODE_NFOUND;
-		goto out;
+		return state;
 	}
 
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+	for (i = 0; i < num_saved; i++)
+		mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start;
 
-	mc_saved_data->mc_saved_count = mc_saved_count;
-out:
+	mcs->num_saved = num_saved;
+
 	return state;
 }
 
@@ -373,7 +371,7 @@
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
 	}
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
@@ -396,11 +394,11 @@
 	unsigned int sig, pf, rev, total_size, data_size, date;
 	struct ucode_cpu_info uci;
 
-	if (mc_saved_data.mc_saved_count == 0) {
+	if (!mc_saved_data.num_saved) {
 		pr_debug("no microcode data saved.\n");
 		return;
 	}
-	pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+	pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved);
 
 	collect_cpu_info_early(&uci);
 
@@ -409,7 +407,7 @@
 	rev = uci.cpu_sig.rev;
 	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
 
-	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+	for (i = 0; i < mc_saved_data.num_saved; i++) {
 		struct microcode_header_intel *mc_saved_header;
 		struct extended_sigtable *ext_header;
 		int ext_sigcount;
@@ -465,7 +463,7 @@
 {
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
 	unsigned int mc_saved_count_init;
-	unsigned int mc_saved_count;
+	unsigned int num_saved;
 	struct microcode_intel **mc_saved;
 	int ret = 0;
 	int i;
@@ -476,23 +474,23 @@
 	 */
 	mutex_lock(&x86_cpu_microcode_mutex);
 
-	mc_saved_count_init = mc_saved_data.mc_saved_count;
-	mc_saved_count = mc_saved_data.mc_saved_count;
+	mc_saved_count_init = mc_saved_data.num_saved;
+	num_saved = mc_saved_data.num_saved;
 	mc_saved = mc_saved_data.mc_saved;
 
-	if (mc_saved && mc_saved_count)
+	if (mc_saved && num_saved)
 		memcpy(mc_saved_tmp, mc_saved,
-		       mc_saved_count * sizeof(struct microcode_intel *));
+		       num_saved * sizeof(struct microcode_intel *));
 	/*
 	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
 	 * version.
 	 */
-	mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+	num_saved = _save_mc(mc_saved_tmp, mc, num_saved);
 
 	/*
 	 * Save the mc_save_tmp in global mc_saved_data.
 	 */
-	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+	ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved);
 	if (ret) {
 		pr_err("Cannot save microcode patch.\n");
 		goto out;
@@ -536,7 +534,7 @@
 
 static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
 static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
 	       unsigned long start, unsigned long size,
 	       struct ucode_cpu_info *uci)
 {
@@ -551,14 +549,18 @@
 	cd.data = NULL;
 	cd.size = 0;
 
-	cd = find_cpio_data(p, (void *)start, size, &offset);
-	if (!cd.data) {
+	/* try built-in microcode if no initrd */
+	if (!size) {
 		if (!load_builtin_intel_microcode(&cd))
 			return UCODE_ERROR;
+	} else {
+		cd = find_cpio_data(p, (void *)start, size, &offset);
+		if (!cd.data)
+			return UCODE_ERROR;
 	}
 
-	return get_matching_model_microcode(0, start, cd.data, cd.size,
-					    mc_saved_data, initrd, uci);
+	return get_matching_model_microcode(start, cd.data, cd.size,
+					    mcs, mc_ptrs, uci);
 }
 
 /*
@@ -567,14 +569,11 @@
 static void
 print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
 {
-	int cpu = smp_processor_id();
-
-	pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu,
-		uci->cpu_sig.rev,
-		date & 0xffff,
-		date >> 24,
-		(date >> 16) & 0xff);
+	pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+		     uci->cpu_sig.rev,
+		     date & 0xffff,
+		     date >> 24,
+		     (date >> 16) & 0xff);
 }
 
 #ifdef CONFIG_X86_32
@@ -603,19 +602,19 @@
  */
 static void print_ucode(struct ucode_cpu_info *uci)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 	int *delay_ucode_info_p;
 	int *current_mc_date_p;
 
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
+	mc = uci->mc;
+	if (!mc)
 		return;
 
 	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
 	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
 
 	*delay_ucode_info_p = 1;
-	*current_mc_date_p = mc_intel->hdr.date;
+	*current_mc_date_p = mc->hdr.date;
 }
 #else
 
@@ -630,37 +629,35 @@
 
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
+	mc = uci->mc;
+	if (!mc)
 		return;
 
-	print_ucode_info(uci, mc_intel->hdr.date);
+	print_ucode_info(uci, mc->hdr.date);
 }
 #endif
 
 static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 	unsigned int val[2];
 
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
+	mc = uci->mc;
+	if (!mc)
 		return 0;
 
 	/* write microcode via MSR 0x79 */
-	native_wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc_intel->hdr.rev)
+	if (val[1] != mc->hdr.rev)
 		return -1;
 
 #ifdef CONFIG_X86_64
@@ -672,25 +669,26 @@
 	if (early)
 		print_ucode(uci);
 	else
-		print_ucode_info(uci, mc_intel->hdr.date);
+		print_ucode_info(uci, mc->hdr.date);
 
 	return 0;
 }
 
 /*
  * This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data.
  */
 int __init save_microcode_in_initrd_intel(void)
 {
-	unsigned int count = mc_saved_data.mc_saved_count;
+	unsigned int count = mc_saved_data.num_saved;
 	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
 	int ret = 0;
 
-	if (count == 0)
+	if (!count)
 		return ret;
 
-	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count);
+
 	ret = save_microcode(&mc_saved_data, mc_saved, count);
 	if (ret)
 		pr_err("Cannot save microcode patches from initrd.\n");
@@ -701,8 +699,7 @@
 }
 
 static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-		      unsigned long *initrd,
+_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
 		      unsigned long start, unsigned long size)
 {
 	struct ucode_cpu_info uci;
@@ -710,11 +707,11 @@
 
 	collect_cpu_info_early(&uci);
 
-	ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+	ret = scan_microcode(mcs, mc_ptrs, start, size, &uci);
 	if (ret != UCODE_OK)
 		return;
 
-	ret = load_microcode(mc_saved_data, initrd, start, &uci);
+	ret = load_microcode(mcs, mc_ptrs, start, &uci);
 	if (ret != UCODE_OK)
 		return;
 
@@ -728,53 +725,49 @@
 	struct boot_params *p;
 
 	p	= (struct boot_params *)__pa_nodebug(&boot_params);
-	start	= p->hdr.ramdisk_image;
 	size	= p->hdr.ramdisk_size;
 
-	_load_ucode_intel_bsp(
-			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-			start, size);
-#else
-	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
-	size	= boot_params.hdr.ramdisk_size;
+	/*
+	 * Set start only if we have an initrd image. We cannot use initrd_start
+	 * because it is not set that early yet.
+	 */
+	start	= (size ? p->hdr.ramdisk_image : 0);
 
-	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+	_load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+			      (unsigned long *)__pa_nodebug(&mc_tmp_ptrs),
+			      start, size);
+#else
+	size	= boot_params.hdr.ramdisk_size;
+	start	= (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0);
+
+	_load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size);
 #endif
 }
 
 void load_ucode_intel_ap(void)
 {
-	struct mc_saved_data *mc_saved_data_p;
+	unsigned long *mcs_tmp_p;
+	struct mc_saved_data *mcs_p;
 	struct ucode_cpu_info uci;
-	unsigned long *mc_saved_in_initrd_p;
-	unsigned long initrd_start_addr;
 	enum ucode_state ret;
 #ifdef CONFIG_X86_32
-	unsigned long *initrd_start_p;
 
-	mc_saved_in_initrd_p =
-		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
-	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
-	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
-	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+	mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs);
+	mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
 #else
-	mc_saved_data_p = &mc_saved_data;
-	mc_saved_in_initrd_p = mc_saved_in_initrd;
-	initrd_start_addr = initrd_start;
+	mcs_tmp_p = mc_tmp_ptrs;
+	mcs_p = &mc_saved_data;
 #endif
 
 	/*
 	 * If there is no valid ucode previously saved in memory, no need to
 	 * update ucode on this AP.
 	 */
-	if (mc_saved_data_p->mc_saved_count == 0)
+	if (!mcs_p->num_saved)
 		return;
 
 	collect_cpu_info_early(&uci);
-	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-			     initrd_start_addr, &uci);
-
+	ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci);
 	if (ret != UCODE_OK)
 		return;
 
@@ -786,13 +779,13 @@
 	struct ucode_cpu_info uci;
 	enum ucode_state ret;
 
-	if (!mc_saved_data.mc_saved_count)
+	if (!mc_saved_data.num_saved)
 		return;
 
 	collect_cpu_info_early(&uci);
 
 	ret = load_microcode_early(mc_saved_data.mc_saved,
-				   mc_saved_data.mc_saved_count, &uci);
+				   mc_saved_data.num_saved, &uci);
 	if (ret != UCODE_OK)
 		return;
 
@@ -825,7 +818,7 @@
  * return 0 - no update found
  * return 1 - found update
  */
-static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
+static int get_matching_mc(struct microcode_intel *mc, int cpu)
 {
 	struct cpu_signature cpu_sig;
 	unsigned int csig, cpf, crev;
@@ -836,39 +829,36 @@
 	cpf = cpu_sig.pf;
 	crev = cpu_sig.rev;
 
-	return has_newer_microcode(mc_intel, csig, cpf, crev);
+	return has_newer_microcode(mc, csig, cpf, crev);
 }
 
 static int apply_microcode_intel(int cpu)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 	struct ucode_cpu_info *uci;
+	struct cpuinfo_x86 *c;
 	unsigned int val[2];
-	int cpu_num = raw_smp_processor_id();
-	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-
-	uci = ucode_cpu_info + cpu;
-	mc_intel = uci->mc;
 
 	/* We should bind the task to the CPU */
-	BUG_ON(cpu_num != cpu);
+	if (WARN_ON(raw_smp_processor_id() != cpu))
+		return -1;
 
-	if (mc_intel == NULL)
+	uci = ucode_cpu_info + cpu;
+	mc = uci->mc;
+	if (!mc)
 		return 0;
 
 	/*
 	 * Microcode on this CPU could be updated earlier. Only apply the
-	 * microcode patch in mc_intel when it is newer than the one on this
+	 * microcode patch in mc when it is newer than the one on this
 	 * CPU.
 	 */
-	if (get_matching_mc(mc_intel, cpu) == 0)
+	if (!get_matching_mc(mc, cpu))
 		return 0;
 
 	/* write microcode via MSR 0x79 */
-	wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
@@ -876,16 +866,19 @@
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
-	if (val[1] != mc_intel->hdr.rev) {
+	if (val[1] != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
-		       cpu_num, mc_intel->hdr.rev);
+		       cpu, mc->hdr.rev);
 		return -1;
 	}
+
 	pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu_num, val[1],
-		mc_intel->hdr.date & 0xffff,
-		mc_intel->hdr.date >> 24,
-		(mc_intel->hdr.date >> 16) & 0xff);
+		cpu, val[1],
+		mc->hdr.date & 0xffff,
+		mc->hdr.date >> 24,
+		(mc->hdr.date >> 16) & 0xff);
+
+	c = &cpu_data(cpu);
 
 	uci->cpu_sig.rev = val[1];
 	c->microcode = val[1];
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index b96896b..2ce1a7d 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -49,7 +49,7 @@
 	unsigned long total_size, data_size, ext_table_size;
 	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header = NULL;
-	int sum, orig_sum, ext_sigcount = 0, i;
+	u32 sum, orig_sum, ext_sigcount = 0, i;
 	struct extended_signature *ext_sig;
 
 	total_size = get_totalsize(mc_header);
@@ -57,69 +57,85 @@
 
 	if (data_size + MC_HEADER_SIZE > total_size) {
 		if (print_err)
-			pr_err("error! Bad data size in microcode data file\n");
+			pr_err("Error: bad microcode data file size.\n");
 		return -EINVAL;
 	}
 
 	if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
 		if (print_err)
-			pr_err("error! Unknown microcode update format\n");
+			pr_err("Error: invalid/unknown microcode update format.\n");
 		return -EINVAL;
 	}
+
 	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
 	if (ext_table_size) {
+		u32 ext_table_sum = 0;
+		u32 *ext_tablep;
+
 		if ((ext_table_size < EXT_HEADER_SIZE)
 		 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
 			if (print_err)
-				pr_err("error! Small exttable size in microcode data file\n");
+				pr_err("Error: truncated extended signature table.\n");
 			return -EINVAL;
 		}
+
 		ext_header = mc + MC_HEADER_SIZE + data_size;
 		if (ext_table_size != exttable_size(ext_header)) {
 			if (print_err)
-				pr_err("error! Bad exttable size in microcode data file\n");
+				pr_err("Error: extended signature table size mismatch.\n");
 			return -EFAULT;
 		}
+
 		ext_sigcount = ext_header->count;
-	}
 
-	/* check extended table checksum */
-	if (ext_table_size) {
-		int ext_table_sum = 0;
-		int *ext_tablep = (int *)ext_header;
+		/*
+		 * Check extended table checksum: the sum of all dwords that
+		 * comprise a valid table must be 0.
+		 */
+		ext_tablep = (u32 *)ext_header;
 
-		i = ext_table_size / DWSIZE;
+		i = ext_table_size / sizeof(u32);
 		while (i--)
 			ext_table_sum += ext_tablep[i];
+
 		if (ext_table_sum) {
 			if (print_err)
-				pr_warn("aborting, bad extended signature table checksum\n");
+				pr_warn("Bad extended signature table checksum, aborting.\n");
 			return -EINVAL;
 		}
 	}
 
-	/* calculate the checksum */
+	/*
+	 * Calculate the checksum of update data and header. The checksum of
+	 * valid update data and header including the extended signature table
+	 * must be 0.
+	 */
 	orig_sum = 0;
-	i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+	i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
 	while (i--)
-		orig_sum += ((int *)mc)[i];
+		orig_sum += ((u32 *)mc)[i];
+
 	if (orig_sum) {
 		if (print_err)
-			pr_err("aborting, bad checksum\n");
+			pr_err("Bad microcode data checksum, aborting.\n");
 		return -EINVAL;
 	}
+
 	if (!ext_table_size)
 		return 0;
-	/* check extended signature checksum */
+
+	/*
+	 * Check extended signature checksum: 0 => valid.
+	 */
 	for (i = 0; i < ext_sigcount; i++) {
 		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
 			  EXT_SIGNATURE_SIZE * i;
-		sum = orig_sum
-			- (mc_header->sig + mc_header->pf + mc_header->cksum)
-			+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+
+		sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+		      (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
 		if (sum) {
 			if (print_err)
-				pr_err("aborting, bad checksum\n");
+				pr_err("Bad extended signature checksum, aborting.\n");
 			return -EINVAL;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710..6988c74 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
 #
 
 IN=$1
@@ -49,8 +49,8 @@
 trap 'rm "$OUT"' EXIT
 
 (
-	echo "#ifndef _ASM_X86_CPUFEATURE_H"
-	echo "#include <asm/cpufeature.h>"
+	echo "#ifndef _ASM_X86_CPUFEATURES_H"
+	echo "#include <asm/cpufeatures.h>"
 	echo "#endif"
 	echo ""
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 20e242e..4e7c693 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -161,8 +161,8 @@
 	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-	printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
-	       ms_hyperv.features, ms_hyperv.hints);
+	pr_info("HyperV: features 0x%x, hints 0x%x\n",
+		ms_hyperv.features, ms_hyperv.hints);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
@@ -174,8 +174,8 @@
 		rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
 		hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
 		lapic_timer_frequency = hv_lapic_frequency;
-		printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
-				lapic_timer_frequency);
+		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+			lapic_timer_frequency);
 	}
 #endif
 
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index 316fe3e..3d68993 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -103,7 +103,7 @@
 	 */
 	if (type != MTRR_TYPE_WRCOMB &&
 	    (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
-		pr_warning("mtrr: only write-combining%s supported\n",
+		pr_warn("mtrr: only write-combining%s supported\n",
 			   centaur_mcr_type ? " and uncacheable are" : " is");
 		return -EINVAL;
 	}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0d98503..31e951c 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -57,9 +57,9 @@
 static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
 
 static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
 
-#define BIOS_BUG_MSG KERN_WARNING \
+#define BIOS_BUG_MSG \
 	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
@@ -81,9 +81,9 @@
 						base, base + size);
 	}
 	if (debug_print) {
-		printk(KERN_DEBUG "After WB checking\n");
+		pr_debug("After WB checking\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end);
 	}
 
@@ -101,7 +101,7 @@
 		    (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
 		    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
 			/* Var MTRR contains UC entry below 1M? Skip it: */
-			printk(BIOS_BUG_MSG, i);
+			pr_warn(BIOS_BUG_MSG, i);
 			if (base + size <= (1<<(20-PAGE_SHIFT)))
 				continue;
 			size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -114,11 +114,11 @@
 				 extra_remove_base + extra_remove_size);
 
 	if  (debug_print) {
-		printk(KERN_DEBUG "After UC checking\n");
+		pr_debug("After UC checking\n");
 		for (i = 0; i < RANGE_NUM; i++) {
 			if (!range[i].end)
 				continue;
-			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end);
 		}
 	}
@@ -126,9 +126,9 @@
 	/* sort the ranges */
 	nr_range = clean_sort_range(range, RANGE_NUM);
 	if  (debug_print) {
-		printk(KERN_DEBUG "After sorting\n");
+		pr_debug("After sorting\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end);
 	}
 
@@ -544,7 +544,7 @@
 		start_base = to_size_factor(start_base, &start_factor),
 		type = range_state[i].type;
 
-		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+		pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
 			i, start_base, start_factor,
 			size_base, size_factor,
 			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -713,7 +713,7 @@
 		return 0;
 
 	/* Print original var MTRRs at first, for debugging: */
-	printk(KERN_DEBUG "original variable MTRRs\n");
+	pr_debug("original variable MTRRs\n");
 	print_out_mtrr_range_state();
 
 	memset(range, 0, sizeof(range));
@@ -733,7 +733,7 @@
 					  x_remove_base, x_remove_size);
 
 	range_sums = sum_ranges(range, nr_range);
-	printk(KERN_INFO "total RAM covered: %ldM\n",
+	pr_info("total RAM covered: %ldM\n",
 	       range_sums >> (20 - PAGE_SHIFT));
 
 	if (mtrr_chunk_size && mtrr_gran_size) {
@@ -745,12 +745,11 @@
 
 		if (!result[i].bad) {
 			set_var_mtrr_all(address_bits);
-			printk(KERN_DEBUG "New variable MTRRs\n");
+			pr_debug("New variable MTRRs\n");
 			print_out_mtrr_range_state();
 			return 1;
 		}
-		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
-		       "will find optimal one\n");
+		pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
 	}
 
 	i = 0;
@@ -768,7 +767,7 @@
 				      x_remove_base, x_remove_size, i);
 			if (debug_print) {
 				mtrr_print_out_one_result(i);
-				printk(KERN_INFO "\n");
+				pr_info("\n");
 			}
 
 			i++;
@@ -779,7 +778,7 @@
 	index_good = mtrr_search_optimal_index();
 
 	if (index_good != -1) {
-		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		pr_info("Found optimal setting for mtrr clean up\n");
 		i = index_good;
 		mtrr_print_out_one_result(i);
 
@@ -790,7 +789,7 @@
 		gran_size <<= 10;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
 		set_var_mtrr_all(address_bits);
-		printk(KERN_DEBUG "New variable MTRRs\n");
+		pr_debug("New variable MTRRs\n");
 		print_out_mtrr_range_state();
 		return 1;
 	} else {
@@ -799,8 +798,8 @@
 			mtrr_print_out_one_result(i);
 	}
 
-	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
-	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+	pr_info("mtrr_cleanup: can not find optimal value\n");
+	pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
 
 	return 0;
 }
@@ -918,7 +917,7 @@
 
 	/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
 	if (!highest_pfn) {
-		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+		pr_info("CPU MTRRs all blank - virtualized system.\n");
 		return 0;
 	}
 
@@ -973,7 +972,8 @@
 							 end_pfn);
 
 	if (total_trim_size) {
-		pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
+		pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
+			total_trim_size >> 20);
 
 		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index c870af1..fcbcb2f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -55,7 +55,7 @@
 
 	rdmsr(MSR_K8_SYSCFG, lo, hi);
 	if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
-		printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+		pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
 		       " not cleared by BIOS, clearing this bit\n",
 		       smp_processor_id());
 		lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
@@ -501,14 +501,14 @@
 	if (!mask)
 		return;
 	if (mask & MTRR_CHANGE_MASK_FIXED)
-		pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+		pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
 	if (mask & MTRR_CHANGE_MASK_VARIABLE)
-		pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+		pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
 	if (mask & MTRR_CHANGE_MASK_DEFTYPE)
-		pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+		pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
 
-	printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
-	printk(KERN_INFO "mtrr: corrected configuration.\n");
+	pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
+	pr_info("mtrr: corrected configuration.\n");
 }
 
 /*
@@ -519,8 +519,7 @@
 void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 {
 	if (wrmsr_safe(msr, a, b) < 0) {
-		printk(KERN_ERR
-			"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+		pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
 			smp_processor_id(), msr, a, b);
 	}
 }
@@ -607,7 +606,7 @@
 		tmp |= ~((1ULL<<(hi - 1)) - 1);
 
 		if (tmp != mask) {
-			printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+			pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
 			add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 			mask = tmp;
 		}
@@ -858,13 +857,13 @@
 	    boot_cpu_data.x86_model == 1 &&
 	    boot_cpu_data.x86_mask <= 7) {
 		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
-			pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+			pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
 			return -EINVAL;
 		}
 		if (!(base + size < 0x70000 || base > 0x7003F) &&
 		    (type == MTRR_TYPE_WRCOMB
 		     || type == MTRR_TYPE_WRBACK)) {
-			pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+			pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
 			return -EINVAL;
 		}
 	}
@@ -878,7 +877,7 @@
 	     lbase = lbase >> 1, last = last >> 1)
 		;
 	if (lbase != last) {
-		pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
+		pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
 		return -EINVAL;
 	}
 	return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5c3d149..10f8d47 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
 #include <linux/smp.h>
 #include <linux/syscore_ops.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
@@ -300,24 +300,24 @@
 		return error;
 
 	if (type >= MTRR_NUM_TYPES) {
-		pr_warning("mtrr: type: %u invalid\n", type);
+		pr_warn("mtrr: type: %u invalid\n", type);
 		return -EINVAL;
 	}
 
 	/* If the type is WC, check that this processor supports it */
 	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-		pr_warning("mtrr: your processor doesn't support write-combining\n");
+		pr_warn("mtrr: your processor doesn't support write-combining\n");
 		return -ENOSYS;
 	}
 
 	if (!size) {
-		pr_warning("mtrr: zero sized request\n");
+		pr_warn("mtrr: zero sized request\n");
 		return -EINVAL;
 	}
 
 	if ((base | (base + size - 1)) >>
 	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
-		pr_warning("mtrr: base or size exceeds the MTRR width\n");
+		pr_warn("mtrr: base or size exceeds the MTRR width\n");
 		return -EINVAL;
 	}
 
@@ -348,7 +348,7 @@
 				} else if (types_compatible(type, ltype))
 					continue;
 			}
-			pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+			pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
 				" 0x%lx000,0x%lx000\n", base, size, lbase,
 				lsize);
 			goto out;
@@ -357,7 +357,7 @@
 		if (ltype != type) {
 			if (types_compatible(type, ltype))
 				continue;
-			pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+			pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
 				base, size, mtrr_attrib_to_str(ltype),
 				mtrr_attrib_to_str(type));
 			goto out;
@@ -395,7 +395,7 @@
 static int mtrr_check(unsigned long base, unsigned long size)
 {
 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-		pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+		pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
 		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 		dump_stack();
 		return -1;
@@ -493,16 +493,16 @@
 		}
 	}
 	if (reg >= max) {
-		pr_warning("mtrr: register: %d too big\n", reg);
+		pr_warn("mtrr: register: %d too big\n", reg);
 		goto out;
 	}
 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
 	if (lsize < 1) {
-		pr_warning("mtrr: MTRR %d not used\n", reg);
+		pr_warn("mtrr: MTRR %d not used\n", reg);
 		goto out;
 	}
 	if (mtrr_usage_table[reg] < 1) {
-		pr_warning("mtrr: reg: %d has count=0\n", reg);
+		pr_warn("mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
 	if (--mtrr_usage_table[reg] < 1)
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 819d949..f6f50c4 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -51,7 +51,7 @@
 	for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
 		if (!rdrand_long(&tmp)) {
 			clear_cpu_cap(c, X86_FEATURE_RDRAND);
-			printk_once(KERN_WARNING "rdrand: disabled\n");
+			pr_warn_once("rdrand: disabled\n");
 			return;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 4c60eaf..cd53135 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -87,10 +87,10 @@
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
 	if (!printed) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		pr_info("CPU: Physical Processor ID: %d\n",
 		       c->phys_proc_id);
 		if (c->x86_max_cores > 1)
-			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+			pr_info("CPU: Processor Core ID: %d\n",
 			       c->cpu_core_id);
 		printed = 1;
 	}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 252da7a..3417856 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 #include "cpu.h"
 
@@ -33,7 +33,7 @@
 	if (max >= 0x80860001) {
 		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
 		if (cpu_rev != 0x02000000) {
-			printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+			pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
 				(cpu_rev >> 24) & 0xff,
 				(cpu_rev >> 16) & 0xff,
 				(cpu_rev >> 8) & 0xff,
@@ -44,10 +44,10 @@
 	if (max >= 0x80860002) {
 		cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
 		if (cpu_rev == 0x02000000) {
-			printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+			pr_info("CPU: Processor revision %08X, %u MHz\n",
 				new_cpu_rev, cpu_freq);
 		}
-		printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+		pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
 		       (cms_rev1 >> 24) & 0xff,
 		       (cms_rev1 >> 16) & 0xff,
 		       (cms_rev1 >> 8) & 0xff,
@@ -76,7 +76,7 @@
 		      (void *)&cpu_info[56],
 		      (void *)&cpu_info[60]);
 		cpu_info[64] = '\0';
-		printk(KERN_INFO "CPU: %s\n", cpu_info);
+		pr_info("CPU: %s\n", cpu_info);
 	}
 
 	/* Unhide possibly hidden capability flags */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 628a059..364e583 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -62,7 +62,7 @@
 	tsc_hz = eax | (((uint64_t)ebx) << 32);
 	do_div(tsc_hz, 1000);
 	BUG_ON(tsc_hz >> 32);
-	printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+	pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
 			 (unsigned long) tsc_hz / 1000,
 			 (unsigned long) tsc_hz % 1000);
 
@@ -84,8 +84,7 @@
 	if (ebx != UINT_MAX)
 		x86_platform.calibrate_tsc = vmware_get_tsc_khz;
 	else
-		printk(KERN_WARNING
-		       "Failed to get TSC freq from the hypervisor\n");
+		pr_warn("Failed to get TSC freq from the hypervisor\n");
 }
 
 /*
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 58f3431..9ef978d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -57,10 +57,9 @@
 	struct kimage *image;
 	/*
 	 * Total number of ram ranges we have after various adjustments for
-	 * GART, crash reserved region etc.
+	 * crash reserved region, etc.
 	 */
 	unsigned int max_nr_ranges;
-	unsigned long gart_start, gart_end;
 
 	/* Pointer to elf header */
 	void *ehdr;
@@ -201,17 +200,6 @@
 	return 0;
 }
 
-static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
-{
-	struct crash_elf_data *ced = arg;
-
-	ced->gart_start = start;
-	ced->gart_end = end;
-
-	/* Not expecting more than 1 gart aperture */
-	return 1;
-}
-
 
 /* Gather all the required information to prepare elf headers for ram regions */
 static void fill_up_crash_elf_data(struct crash_elf_data *ced,
@@ -226,22 +214,6 @@
 
 	ced->max_nr_ranges = nr_ranges;
 
-	/*
-	 * We don't create ELF headers for GART aperture as an attempt
-	 * to dump this memory in second kernel leads to hang/crash.
-	 * If gart aperture is present, one needs to exclude that region
-	 * and that could lead to need of extra phdr.
-	 */
-	walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
-				ced, get_gart_ranges_callback);
-
-	/*
-	 * If we have gart region, excluding that could potentially split
-	 * a memory range, resulting in extra header. Account for  that.
-	 */
-	if (ced->gart_end)
-		ced->max_nr_ranges++;
-
 	/* Exclusion of crash region could split memory ranges */
 	ced->max_nr_ranges++;
 
@@ -350,13 +322,6 @@
 			return ret;
 	}
 
-	/* Exclude GART region */
-	if (ced->gart_end) {
-		ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
-		if (ret)
-			return ret;
-	}
-
 	return ret;
 }
 
@@ -599,12 +564,12 @@
 	/* Add ACPI tables */
 	cmd.type = E820_ACPI;
 	flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-	walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+	walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
 		       memmap_entry_callback);
 
 	/* Add ACPI Non-volatile Storage */
 	cmd.type = E820_NVS;
-	walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+	walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
 			memmap_entry_callback);
 
 	/* Add crashk_low_res region */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 569c1e4..621b501 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
+#include <asm/cpufeature.h>
 
 /*
  * The e820 map is the map that gets modified e.g. with command line parameters
@@ -925,6 +926,41 @@
 	}
 }
 
+static unsigned long e820_type_to_iomem_type(int e820_type)
+{
+	switch (e820_type) {
+	case E820_RESERVED_KERN:
+	case E820_RAM:
+		return IORESOURCE_SYSTEM_RAM;
+	case E820_ACPI:
+	case E820_NVS:
+	case E820_UNUSABLE:
+	case E820_PRAM:
+	case E820_PMEM:
+	default:
+		return IORESOURCE_MEM;
+	}
+}
+
+static unsigned long e820_type_to_iores_desc(int e820_type)
+{
+	switch (e820_type) {
+	case E820_ACPI:
+		return IORES_DESC_ACPI_TABLES;
+	case E820_NVS:
+		return IORES_DESC_ACPI_NV_STORAGE;
+	case E820_PMEM:
+		return IORES_DESC_PERSISTENT_MEMORY;
+	case E820_PRAM:
+		return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+	case E820_RESERVED_KERN:
+	case E820_RAM:
+	case E820_UNUSABLE:
+	default:
+		return IORES_DESC_NONE;
+	}
+}
+
 static bool do_mark_busy(u32 type, struct resource *res)
 {
 	/* this is the legacy bios/dos rom-shadow + mmio region */
@@ -967,7 +1003,8 @@
 		res->start = e820.map[i].addr;
 		res->end = end;
 
-		res->flags = IORESOURCE_MEM;
+		res->flags = e820_type_to_iomem_type(e820.map[i].type);
+		res->desc = e820_type_to_iores_desc(e820.map[i].type);
 
 		/*
 		 * don't register the region that could be conflicted with
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index d25097c..0b1b9ab 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -114,6 +114,10 @@
 	kernel_fpu_disable();
 
 	if (fpu->fpregs_active) {
+		/*
+		 * Ignore return value -- we don't care if reg state
+		 * is clobbered.
+		 */
 		copy_fpregs_to_fpstate(fpu);
 	} else {
 		this_cpu_write(fpu_fpregs_owner_ctx, NULL);
@@ -189,8 +193,12 @@
 
 	preempt_disable();
 	if (fpu->fpregs_active) {
-		if (!copy_fpregs_to_fpstate(fpu))
-			fpregs_deactivate(fpu);
+		if (!copy_fpregs_to_fpstate(fpu)) {
+			if (use_eager_fpu())
+				copy_kernel_to_fpregs(&fpu->state);
+			else
+				fpregs_deactivate(fpu);
+		}
 	}
 	preempt_enable();
 }
@@ -223,14 +231,15 @@
 }
 EXPORT_SYMBOL_GPL(fpstate_init);
 
-/*
- * Copy the current task's FPU state to a new task's FPU context.
- *
- * In both the 'eager' and the 'lazy' case we save hardware registers
- * directly to the destination buffer.
- */
-static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
+	dst_fpu->counter = 0;
+	dst_fpu->fpregs_active = 0;
+	dst_fpu->last_cpu = -1;
+
+	if (!src_fpu->fpstate_active || !cpu_has_fpu)
+		return 0;
+
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
 
 	/*
@@ -243,10 +252,9 @@
 	/*
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
-	 *
-	 * If the FPU context got destroyed in the process (FNSAVE
-	 * done on old CPUs) then copy it back into the source
-	 * context and mark the current task for lazy restore.
+	 * In lazy mode, if the FPU context isn't loaded into
+	 * fpregs, CR0.TS will be set and do_device_not_available
+	 * will load the FPU context.
 	 *
 	 * We have to do all this with preemption disabled,
 	 * mostly because of the FNSAVE case, because in that
@@ -259,19 +267,13 @@
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
 		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
-		fpregs_deactivate(src_fpu);
+
+		if (use_eager_fpu())
+			copy_kernel_to_fpregs(&src_fpu->state);
+		else
+			fpregs_deactivate(src_fpu);
 	}
 	preempt_enable();
-}
-
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
-{
-	dst_fpu->counter = 0;
-	dst_fpu->fpregs_active = 0;
-	dst_fpu->last_cpu = -1;
-
-	if (src_fpu->fpstate_active && cpu_has_fpu)
-		fpu_copy(dst_fpu, src_fpu);
 
 	return 0;
 }
@@ -409,8 +411,10 @@
 {
 	if (use_xsave())
 		copy_kernel_to_xregs(&init_fpstate.xsave, -1);
-	else
+	else if (static_cpu_has(X86_FEATURE_FXSR))
 		copy_kernel_to_fxregs(&init_fpstate.fxsave);
+	else
+		copy_kernel_to_fregs(&init_fpstate.fsave);
 }
 
 /*
@@ -423,7 +427,7 @@
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
-	if (!use_eager_fpu()) {
+	if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
 		/* FPU state will be reallocated lazily at the first use. */
 		fpu__drop(fpu);
 	} else {
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6d9f0a7..54c86ff 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -78,13 +78,15 @@
 	cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
 	write_cr0(cr0);
 
-	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
-		     : "+m" (fsw), "+m" (fcw));
+	if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
+		asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+			     : "+m" (fsw), "+m" (fcw));
 
-	if (fsw == 0 && (fcw & 0x103f) == 0x003f)
-		set_cpu_cap(c, X86_FEATURE_FPU);
-	else
-		clear_cpu_cap(c, X86_FEATURE_FPU);
+		if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+			set_cpu_cap(c, X86_FEATURE_FPU);
+		else
+			clear_cpu_cap(c, X86_FEATURE_FPU);
+	}
 
 #ifndef CONFIG_MATH_EMULATION
 	if (!cpu_has_fpu) {
@@ -132,7 +134,7 @@
 	 * Set up the legacy init FPU context. (xstate init might overwrite this
 	 * with a more modern format, if the CPU supports it.)
 	 */
-	fpstate_init_fxstate(&init_fpstate.fxsave);
+	fpstate_init(&init_fpstate);
 
 	fpu__init_system_mxcsr();
 }
@@ -260,7 +262,10 @@
  * not only saved the restores along the way, but we also have the
  * FPU ready to be used for the original task.
  *
- * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * 'lazy' is deprecated because it's almost never a performance win
+ * and it's much more complicated than 'eager'.
+ *
+ * 'eager' switching is by default on all CPUs, there we switch the FPU
  * state during every context switch, regardless of whether the task
  * has used FPU instructions in that time slice or not. This is done
  * because modern FPU context saving instructions are able to optimize
@@ -271,7 +276,7 @@
  *   to use 'eager' restores, if we detect that a task is using the FPU
  *   frequently. See the fpu->counter logic in fpu/internal.h for that. ]
  */
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
 
 /*
  * Find supported xfeatures based on cpu features and command-line input.
@@ -300,12 +305,6 @@
 static void __init fpu__clear_eager_fpu_features(void)
 {
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX2);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
 }
 
 /*
@@ -348,15 +347,9 @@
  */
 static void __init fpu__init_parse_early_param(void)
 {
-	/*
-	 * No need to check "eagerfpu=auto" again, since it is the
-	 * initial default.
-	 */
 	if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
 		eagerfpu = DISABLE;
 		fpu__clear_eager_fpu_features();
-	} else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
-		eagerfpu = ENABLE;
 	}
 
 	if (cmdline_find_option_bool(boot_command_line, "no387"))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5..6e8354f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -51,6 +51,9 @@
 	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 29408d6..702547c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -81,9 +81,9 @@
 static unsigned long text_ip_addr(unsigned long ip)
 {
 	/*
-	 * On x86_64, kernel text mappings are mapped read-only with
-	 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
-	 * of the kernel text mapping to modify the kernel text.
+	 * On x86_64, kernel text mappings are mapped read-only, so we use
+	 * the kernel identity mapping instead of the kernel text mapping
+	 * to modify the kernel text.
 	 *
 	 * For 32bit kernels, these mappings are same and we can use
 	 * kernel identity mapping to modify code.
@@ -697,9 +697,8 @@
 #endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
-extern void ftrace_caller_end(void);
 extern void ftrace_regs_caller_end(void);
-extern void ftrace_return(void);
+extern void ftrace_epilogue(void);
 extern void ftrace_caller_op_ptr(void);
 extern void ftrace_regs_caller_op_ptr(void);
 
@@ -746,7 +745,7 @@
 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
 	} else {
 		start_offset = (unsigned long)ftrace_caller;
-		end_offset = (unsigned long)ftrace_caller_end;
+		end_offset = (unsigned long)ftrace_epilogue;
 		op_offset = (unsigned long)ftrace_caller_op_ptr;
 	}
 
@@ -754,7 +753,7 @@
 
 	/*
 	 * Allocate enough size to store the ftrace_caller code,
-	 * the jmp to ftrace_return, as well as the address of
+	 * the jmp to ftrace_epilogue, as well as the address of
 	 * the ftrace_ops this trampoline is used for.
 	 */
 	trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
@@ -772,8 +771,8 @@
 
 	ip = (unsigned long)trampoline + size;
 
-	/* The trampoline ends with a jmp to ftrace_return */
-	jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+	/* The trampoline ends with a jmp to ftrace_epilogue */
+	jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
 	memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
 
 	/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f129a9a..1f4422d 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -40,13 +40,8 @@
 /* Wipe all early page tables except for the kernel symbol map */
 static void __init reset_early_page_tables(void)
 {
-	unsigned long i;
-
-	for (i = 0; i < PTRS_PER_PGD-1; i++)
-		early_level4_pgt[i].pgd = 0;
-
+	memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
 	next_early_pgt = 0;
-
 	write_cr3(__pa_nodebug(early_level4_pgt));
 }
 
@@ -54,7 +49,6 @@
 int __init early_make_pgtable(unsigned long address)
 {
 	unsigned long physaddr = address - __PAGE_OFFSET;
-	unsigned long i;
 	pgdval_t pgd, *pgd_p;
 	pudval_t pud, *pud_p;
 	pmdval_t pmd, *pmd_p;
@@ -81,8 +75,7 @@
 		}
 
 		pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
-		for (i = 0; i < PTRS_PER_PUD; i++)
-			pud_p[i] = 0;
+		memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
 		*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
 	}
 	pud_p += pud_index(address);
@@ -97,8 +90,7 @@
 		}
 
 		pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
-		for (i = 0; i < PTRS_PER_PMD; i++)
-			pmd_p[i] = 0;
+		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
 		*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
 	}
 	pmd = (physaddr & PMD_MASK) + early_pmd_flags;
@@ -192,5 +184,13 @@
 
 	reserve_ebda_region();
 
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_INTEL_MID:
+		x86_intel_mid_early_setup();
+		break;
+	default:
+		break;
+	}
+
 	start_kernel();
 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae2..54cdbd2 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
 #include <asm/setup.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include <asm/bootparam.h>
@@ -389,6 +389,12 @@
 	/* Make changes effective */
 	wrmsr
 
+	/*
+	 * And make sure that all the mappings we set up have NX set from
+	 * the beginning.
+	 */
+	orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
+
 enable_paging:
 
 /*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e8..22fbf9d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,6 @@
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
 L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
 L4_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
@@ -76,9 +75,7 @@
 	subq	$_text - __START_KERNEL_map, %rbp
 
 	/* Is the address not 2M aligned? */
-	movq	%rbp, %rax
-	andl	$~PMD_PAGE_MASK, %eax
-	testl	%eax, %eax
+	testl	$~PMD_PAGE_MASK, %ebp
 	jnz	bad_address
 
 	/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5..be0ebbb 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
 #include <linux/pm.h>
 #include <linux/io.h>
 
+#include <asm/cpufeature.h>
 #include <asm/irqdomain.h>
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f8062aa..61521dc 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -462,7 +462,7 @@
 		 * non intr-remapping case, we can't wait till this interrupt
 		 * arrives at this cpu before completing the irq move.
 		 */
-		irq_force_complete_move(irq);
+		irq_force_complete_move(desc);
 
 		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
@@ -470,6 +470,15 @@
 		}
 
 		chip = irq_data_get_irq_chip(data);
+		/*
+		 * The interrupt descriptor might have been cleaned up
+		 * already, but it is not yet removed from the radix tree
+		 */
+		if (!chip) {
+			raw_spin_unlock(&desc->lock);
+			continue;
+		}
+
 		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 			chip->irq_mask(data);
 
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 44256a6..ed15cd48 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -750,9 +750,7 @@
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
-#ifdef CONFIG_DEBUG_RODATA
 	char opc[BREAK_INSTR_SIZE];
-#endif /* CONFIG_DEBUG_RODATA */
 
 	bpt->type = BP_BREAKPOINT;
 	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -761,7 +759,6 @@
 		return err;
 	err = probe_kernel_write((char *)bpt->bpt_addr,
 				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
-#ifdef CONFIG_DEBUG_RODATA
 	if (!err)
 		return err;
 	/*
@@ -778,13 +775,12 @@
 	if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
 		return -EINVAL;
 	bpt->type = BP_POKE_BREAKPOINT;
-#endif /* CONFIG_DEBUG_RODATA */
+
 	return err;
 }
 
 int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-#ifdef CONFIG_DEBUG_RODATA
 	int err;
 	char opc[BREAK_INSTR_SIZE];
 
@@ -801,8 +797,8 @@
 	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
 		goto knl_write;
 	return err;
+
 knl_write:
-#endif /* CONFIG_DEBUG_RODATA */
 	return probe_kernel_write((char *)bpt->bpt_addr,
 				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
 }
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1deffe6..0f05dee 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -988,7 +988,7 @@
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		if (fixup_exception(regs))
+		if (fixup_exception(regs, trapnr))
 			return 1;
 
 		/*
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 87e1762..ed48a9f 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -168,12 +168,14 @@
 	restore_mcount_regs
 
 	/*
-	 * The copied trampoline must call ftrace_return as it
+	 * The copied trampoline must call ftrace_epilogue as it
 	 * still may need to call the function graph tracer.
+	 *
+	 * The code up to this label is copied into trampolines so
+	 * think twice before adding any new code or changing the
+	 * layout here.
 	 */
-GLOBAL(ftrace_caller_end)
-
-GLOBAL(ftrace_return)
+GLOBAL(ftrace_epilogue)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
@@ -244,14 +246,14 @@
 	popfq
 
 	/*
-	 * As this jmp to ftrace_return can be a short jump
+	 * As this jmp to ftrace_epilogue can be a short jump
 	 * it must not be copied into the trampoline.
 	 * The trampoline will add the code to jump
 	 * to the return.
 	 */
 GLOBAL(ftrace_regs_caller_end)
 
-	jmp ftrace_return
+	jmp ftrace_epilogue
 
 END(ftrace_regs_caller)
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 30ca760..97340f2 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -408,7 +408,7 @@
 	processor.cpuflag = CPU_ENABLED;
 	processor.cpufeature = (boot_cpu_data.x86 << 8) |
 	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-	processor.featureflag = boot_cpu_data.x86_capability[0];
+	processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
 	processor.reserved[0] = 0;
 	processor.reserved[1] = 0;
 	for (i = 0; i < 2; i++) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616..7f3550a 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 
 static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8a2cdd7..04b132a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -30,6 +30,7 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
+#include <asm/cache.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
@@ -69,7 +70,7 @@
 
 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
 
-static int ignore_nmis;
+static int ignore_nmis __read_mostly;
 
 int unknown_nmi_panic;
 /*
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 14415af..92f7014 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -13,11 +13,11 @@
 
 static __init int register_e820_pmem(void)
 {
-	char *pmem = "Persistent Memory (legacy)";
 	struct platform_device *pdev;
 	int rc;
 
-	rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+	rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+				 IORESOURCE_MEM, 0, -1, NULL, found);
 	if (rc <= 0)
 		return 0;
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9f7c21c..2915d54 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,6 +57,9 @@
 	  */
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
+#ifdef CONFIG_X86_32
+	.SYSENTER_stack_canary	= STACK_END_MAGIC,
+#endif
 };
 EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
@@ -418,9 +421,9 @@
 	if (!current_set_polling_and_test()) {
 		trace_cpu_idle_rcuidle(1, smp_processor_id());
 		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
-			smp_mb(); /* quirk */
+			mb(); /* quirk */
 			clflush((void *)&current_thread_info()->flags);
-			smp_mb(); /* quirk */
+			mb(); /* quirk */
 		}
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3d80e6..aa52c10 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -152,21 +152,21 @@
 	.name	= "Kernel data",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cb6282c..548ddf7 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,7 +61,38 @@
 	regs->seg = GET_SEG(seg) | 3;			\
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+#ifdef CONFIG_X86_64
+/*
+ * If regs->ss will cause an IRET fault, change it.  Otherwise leave it
+ * alone.  Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+	u32 ar;
+	asm volatile ("lar %[old_ss], %[ar]\n\t"
+		      "jz 1f\n\t"		/* If invalid: */
+		      "xorl %[ar], %[ar]\n\t"	/* set ar = 0 */
+		      "1:"
+		      : [ar] "=r" (ar)
+		      : [old_ss] "rm" ((u16)regs->ss));
+
+	/*
+	 * For a valid 64-bit user context, we need DPL 3, type
+	 * read-write data or read-write exp-down data, and S and P
+	 * set.  We can't use VERW because VERW doesn't check the
+	 * P bit.
+	 */
+	ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+	if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+	    ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+		regs->ss = __USER_DS;
+}
+#endif
+
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc,
+			      unsigned long uc_flags)
 {
 	unsigned long buf_val;
 	void __user *buf;
@@ -94,15 +125,18 @@
 		COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-		/* Kernel saves and restores only the CS segment register on signals,
-		 * which is the bare minimum needed to allow mixed 32/64-bit code.
-		 * App's signal handler can save/restore other segments if needed. */
-		COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+		/*
+		 * Fix up SS if needed for the benefit of old DOSEMU and
+		 * CRIU.
+		 */
+		if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+			     user_64bit_mode(regs)))
+			force_valid_ss(regs);
+#endif
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -165,6 +199,7 @@
 		put_user_ex(regs->cs, &sc->cs);
 		put_user_ex(0, &sc->gs);
 		put_user_ex(0, &sc->fs);
+		put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
 		put_user_ex(fpstate, &sc->fpstate);
@@ -403,6 +438,21 @@
 	return 0;
 }
 #else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+	unsigned long flags;
+
+	if (cpu_has_xsave)
+		flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+	else
+		flags = UC_SIGCONTEXT_SS;
+
+	if (likely(user_64bit_mode(regs)))
+		flags |= UC_STRICT_RESTORE_SS;
+
+	return flags;
+}
+
 static int __setup_rt_frame(int sig, struct ksignal *ksig,
 			    sigset_t *set, struct pt_regs *regs)
 {
@@ -422,10 +472,7 @@
 
 	put_user_try {
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-		else
-			put_user_ex(0, &frame->uc.uc_flags);
+		put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
@@ -459,10 +506,28 @@
 
 	regs->sp = (unsigned long)frame;
 
-	/* Set up the CS register to run signal handlers in 64-bit mode,
-	   even if the handler happens to be interrupting 32-bit code. */
+	/*
+	 * Set up the CS and SS registers to run signal handlers in
+	 * 64-bit mode, even if the handler happens to be interrupting
+	 * 32-bit or 16-bit code.
+	 *
+	 * SS is subtle.  In 64-bit mode, we don't need any particular
+	 * SS descriptor, but we do need SS to be valid.  It's possible
+	 * that the old SS is entirely bogus -- this can happen if the
+	 * signal we're trying to deliver is #GP or #SS caused by a bad
+	 * SS value.  We also have a compatbility issue here: DOSEMU
+	 * relies on the contents of the SS register indicating the
+	 * SS value at the time of the signal, even though that code in
+	 * DOSEMU predates sigreturn's ability to restore SS.  (DOSEMU
+	 * avoids relying on sigreturn to restore SS; instead it uses
+	 * a trampoline.)  So we do our best: if the old SS was valid,
+	 * we keep it.  Otherwise we replace it.
+	 */
 	regs->cs = __USER_CS;
 
+	if (unlikely(regs->ss != __USER_DS))
+		force_valid_ss(regs);
+
 	return 0;
 }
 #endif /* CONFIG_X86_32 */
@@ -489,10 +554,7 @@
 
 	put_user_try {
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-		else
-			put_user_ex(0, &frame->uc.uc_flags);
+		put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
@@ -554,7 +616,11 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->sc))
+	/*
+	 * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+	 * Save a few cycles by skipping the __get_user.
+	 */
+	if (restore_sigcontext(regs, &frame->sc, 0))
 		goto badframe;
 	return regs->ax;
 
@@ -570,16 +636,19 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
 	sigset_t set;
+	unsigned long uc_flags;
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+	if (__get_user(uc_flags, &frame->uc.uc_flags))
+		goto badframe;
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
 	if (restore_altstack(&frame->uc.uc_stack))
@@ -692,12 +761,15 @@
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+	if (is_ia32_task())
+		return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+	return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
 	return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
-	return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
-		__NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
 }
 
 /*
@@ -763,6 +835,7 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
+	unsigned long uc_flags;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -770,10 +843,12 @@
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+	if (__get_user(uc_flags, &frame->uc.uc_flags))
+		goto badframe;
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 24d57f7..3bf1e0b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,6 +97,14 @@
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
+/* Logical package management. We might want to allocate that dynamically */
+static int *physical_to_logical_pkg __read_mostly;
+static unsigned long *physical_package_map __read_mostly;;
+static unsigned long *logical_package_map  __read_mostly;
+static unsigned int max_physical_pkg_id __read_mostly;
+unsigned int __max_logical_packages __read_mostly;
+EXPORT_SYMBOL(__max_logical_packages);
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	unsigned long flags;
@@ -251,6 +259,97 @@
 	cpu_startup_entry(CPUHP_ONLINE);
 }
 
+int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+{
+	unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+	/* Called from early boot ? */
+	if (!physical_package_map)
+		return 0;
+
+	if (pkg >= max_physical_pkg_id)
+		return -EINVAL;
+
+	/* Set the logical package id */
+	if (test_and_set_bit(pkg, physical_package_map))
+		goto found;
+
+	if (pkg < __max_logical_packages) {
+		set_bit(pkg, logical_package_map);
+		physical_to_logical_pkg[pkg] = pkg;
+		goto found;
+	}
+	new = find_first_zero_bit(logical_package_map, __max_logical_packages);
+	if (new >= __max_logical_packages) {
+		physical_to_logical_pkg[pkg] = -1;
+		pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+			apicid, pkg);
+		return -ENOSPC;
+	}
+	set_bit(new, logical_package_map);
+	pr_info("APIC(%x) Converting physical %u to logical package %u\n",
+		apicid, pkg, new);
+	physical_to_logical_pkg[pkg] = new;
+
+found:
+	cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+	return 0;
+}
+
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+	if (phys_pkg >= max_physical_pkg_id)
+		return -1;
+	return physical_to_logical_pkg[phys_pkg];
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+static void __init smp_init_package_map(void)
+{
+	unsigned int ncpus, cpu;
+	size_t size;
+
+	/*
+	 * Today neither Intel nor AMD support heterogenous systems. That
+	 * might change in the future....
+	 */
+	ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
+	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+
+	/*
+	 * Possibly larger than what we need as the number of apic ids per
+	 * package can be smaller than the actual used apic ids.
+	 */
+	max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
+	size = max_physical_pkg_id * sizeof(unsigned int);
+	physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
+	memset(physical_to_logical_pkg, 0xff, size);
+	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
+	physical_package_map = kzalloc(size, GFP_KERNEL);
+	size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
+	logical_package_map = kzalloc(size, GFP_KERNEL);
+
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+	for_each_present_cpu(cpu) {
+		unsigned int apicid = apic->cpu_present_to_apicid(cpu);
+
+		if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
+			continue;
+		if (!topology_update_package_map(apicid, cpu))
+			continue;
+		pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
+		per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
+		set_cpu_possible(cpu, false);
+		set_cpu_present(cpu, false);
+	}
+}
+
 void __init smp_store_boot_cpu_info(void)
 {
 	int id = 0; /* CPU 0 */
@@ -258,6 +357,7 @@
 
 	*c = boot_cpu_data;
 	c->cpu_index = id;
+	smp_init_package_map();
 }
 
 /*
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 3f92ce0..27538f1 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -142,7 +142,6 @@
 	 * by the error message
 	 */
 
-#ifdef CONFIG_DEBUG_RODATA
 	/* Test 3: Check if the .rodata section is executable */
 	if (rodata_test_data != 0xC3) {
 		printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
@@ -151,7 +150,6 @@
 		printk(KERN_ERR "test_nx: .rodata section is executable\n");
 		ret = -ENODEV;
 	}
-#endif
 
 #if 0
 	/* Test 4: Check if the .data section of a module is executable */
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 5ecbfe5..cb4a01b 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -76,5 +76,5 @@
 }
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ade185a..06cbe25 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,32 +83,18 @@
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
 
-static inline void conditional_sti(struct pt_regs *regs)
+static inline void cond_local_irq_enable(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
-	preempt_count_inc();
-	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_enable();
-}
-
-static inline void conditional_cli(struct pt_regs *regs)
+static inline void cond_local_irq_disable(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_disable();
 }
 
-static inline void preempt_conditional_cli(struct pt_regs *regs)
-{
-	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_disable();
-	preempt_count_dec();
-}
-
 void ist_enter(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
@@ -199,7 +185,7 @@
 	}
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs)) {
+		if (!fixup_exception(regs, trapnr)) {
 			tsk->thread.error_code = error_code;
 			tsk->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
@@ -262,7 +248,6 @@
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = trapnr;
 
-#ifdef CONFIG_X86_64
 	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 	    printk_ratelimit()) {
 		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
@@ -271,7 +256,6 @@
 		print_vma_addr(" in ", regs->ip);
 		pr_cont("\n");
 	}
-#endif
 
 	force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
 }
@@ -286,7 +270,7 @@
 
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
 			NOTIFY_STOP) {
-		conditional_sti(regs);
+		cond_local_irq_enable(regs);
 		do_trap(trapnr, signr, str, regs, error_code,
 			fill_trap_info(regs, signr, trapnr, &info));
 	}
@@ -368,7 +352,7 @@
 	if (notify_die(DIE_TRAP, "bounds", regs, error_code,
 			X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
 		return;
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 
 	if (!user_mode(regs))
 		die("bounds", regs, error_code);
@@ -443,7 +427,7 @@
 	struct task_struct *tsk;
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 
 	if (v8086_mode(regs)) {
 		local_irq_enable();
@@ -453,7 +437,7 @@
 
 	tsk = current;
 	if (!user_mode(regs)) {
-		if (fixup_exception(regs))
+		if (fixup_exception(regs, X86_TRAP_GP))
 			return;
 
 		tsk->thread.error_code = error_code;
@@ -517,9 +501,11 @@
 	 * as we may switch to the interrupt stack.
 	 */
 	debug_stack_usage_inc();
-	preempt_conditional_sti(regs);
+	preempt_disable();
+	cond_local_irq_enable(regs);
 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
-	preempt_conditional_cli(regs);
+	cond_local_irq_disable(regs);
+	preempt_enable_no_resched();
 	debug_stack_usage_dec();
 exit:
 	ist_exit(regs);
@@ -571,6 +557,29 @@
 NOKPROBE_SYMBOL(fixup_bad_iret);
 #endif
 
+static bool is_sysenter_singlestep(struct pt_regs *regs)
+{
+	/*
+	 * We don't try for precision here.  If we're anywhere in the region of
+	 * code that can be single-stepped in the SYSENTER entry path, then
+	 * assume that this is a useless single-step trap due to SYSENTER
+	 * being invoked with TF set.  (We don't know in advance exactly
+	 * which instructions will be hit because BTF could plausibly
+	 * be set.)
+	 */
+#ifdef CONFIG_X86_32
+	return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
+		(unsigned long)__end_SYSENTER_singlestep_region -
+		(unsigned long)__begin_SYSENTER_singlestep_region;
+#elif defined(CONFIG_IA32_EMULATION)
+	return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
+		(unsigned long)__end_entry_SYSENTER_compat -
+		(unsigned long)entry_SYSENTER_compat;
+#else
+	return false;
+#endif
+}
+
 /*
  * Our handling of the processor debug registers is non-trivial.
  * We do not clear them on entry and exit from the kernel. Therefore
@@ -605,11 +614,42 @@
 	ist_enter(regs);
 
 	get_debugreg(dr6, 6);
+	/*
+	 * The Intel SDM says:
+	 *
+	 *   Certain debug exceptions may clear bits 0-3. The remaining
+	 *   contents of the DR6 register are never cleared by the
+	 *   processor. To avoid confusion in identifying debug
+	 *   exceptions, debug handlers should clear the register before
+	 *   returning to the interrupted task.
+	 *
+	 * Keep it simple: clear DR6 immediately.
+	 */
+	set_debugreg(0, 6);
 
 	/* Filter out all the reserved bits which are preset to 1 */
 	dr6 &= ~DR6_RESERVED;
 
 	/*
+	 * The SDM says "The processor clears the BTF flag when it
+	 * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
+	 * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+	 */
+	clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
+
+	if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
+		     is_sysenter_singlestep(regs))) {
+		dr6 &= ~DR_STEP;
+		if (!dr6)
+			goto exit;
+		/*
+		 * else we might have gotten a single-step trap and hit a
+		 * watchpoint at the same time, in which case we should fall
+		 * through and handle the watchpoint.
+		 */
+	}
+
+	/*
 	 * If dr6 has no reason to give us about the origin of this trap,
 	 * then it's very likely the result of an icebp/int01 trap.
 	 * User wants a sigtrap for that.
@@ -617,18 +657,10 @@
 	if (!dr6 && user_mode(regs))
 		user_icebp = 1;
 
-	/* Catch kmemcheck conditions first of all! */
+	/* Catch kmemcheck conditions! */
 	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
 		goto exit;
 
-	/* DR6 may or may not be cleared by the CPU */
-	set_debugreg(0, 6);
-
-	/*
-	 * The processor cleared BTF, so don't mark that we need it set.
-	 */
-	clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
-
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
@@ -648,24 +680,25 @@
 	debug_stack_usage_inc();
 
 	/* It's safe to allow irq's after DR6 has been saved */
-	preempt_conditional_sti(regs);
+	preempt_disable();
+	cond_local_irq_enable(regs);
 
 	if (v8086_mode(regs)) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
 					X86_TRAP_DB);
-		preempt_conditional_cli(regs);
+		cond_local_irq_disable(regs);
+		preempt_enable_no_resched();
 		debug_stack_usage_dec();
 		goto exit;
 	}
 
-	/*
-	 * Single-stepping through system calls: ignore any exceptions in
-	 * kernel space, but re-enable TF when returning to user mode.
-	 *
-	 * We already checked v86 mode above, so we can check for kernel mode
-	 * by just checking the CPL of CS.
-	 */
-	if ((dr6 & DR_STEP) && !user_mode(regs)) {
+	if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
+		/*
+		 * Historical junk that used to handle SYSENTER single-stepping.
+		 * This should be unreachable now.  If we survive for a while
+		 * without anyone hitting this warning, we'll turn this into
+		 * an oops.
+		 */
 		tsk->thread.debugreg6 &= ~DR_STEP;
 		set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 		regs->flags &= ~X86_EFLAGS_TF;
@@ -673,10 +706,19 @@
 	si_code = get_si_code(tsk->thread.debugreg6);
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 		send_sigtrap(tsk, regs, error_code, si_code);
-	preempt_conditional_cli(regs);
+	cond_local_irq_disable(regs);
+	preempt_enable_no_resched();
 	debug_stack_usage_dec();
 
 exit:
+#if defined(CONFIG_X86_32)
+	/*
+	 * This is the most likely code path that involves non-trivial use
+	 * of the SYSENTER stack.  Check that we haven't overrun it.
+	 */
+	WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+	     "Overran or corrupted SYSENTER stack\n");
+#endif
 	ist_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug);
@@ -696,10 +738,10 @@
 
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
 		return;
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs)) {
+		if (!fixup_exception(regs, trapnr)) {
 			task->thread.error_code = error_code;
 			task->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
@@ -743,20 +785,19 @@
 dotraplinkage void
 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 {
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 }
 
 dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-	BUG_ON(use_eager_fpu());
 
 #ifdef CONFIG_MATH_EMULATION
-	if (read_cr0() & X86_CR0_EM) {
+	if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
 		struct math_emu_info info = { };
 
-		conditional_sti(regs);
+		cond_local_irq_enable(regs);
 
 		info.regs = regs;
 		math_emulate(&info);
@@ -765,7 +806,7 @@
 #endif
 	fpu__restore(&current->thread.fpu); /* interrupts still off */
 #ifdef CONFIG_X86_32
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 #endif
 }
 NOKPROBE_SYMBOL(do_device_not_available);
@@ -868,7 +909,7 @@
 #endif
 
 #ifdef CONFIG_X86_32
-	set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+	set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
 	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 80d761e..5638044 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1305,14 +1305,14 @@
  */
 unsigned long calibrate_delay_is_known(void)
 {
-	int i, cpu = smp_processor_id();
+	int sibling, cpu = smp_processor_id();
 
 	if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
-	for_each_online_cpu(i)
-		if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
-			return cpu_data(i).loops_per_jiffy;
+	sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+	if (sibling < nr_cpu_ids)
+		return cpu_data(sibling).loops_per_jiffy;
 	return 0;
 }
 #endif
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35..014ea59 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
  * 	appropriately. Either display a message or halt.
  */
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
 verify_cpu:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b85..3dce1ca 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@
 	/* make room for real-mode segments */
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has_safe(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP))
 		tsk->thread.sysenter_cs = 0;
 
 	load_sp0(tss, &tsk->thread);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf1..5af9958 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,29 +41,28 @@
 jiffies_64 = jiffies;
 #endif
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 /*
- * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
- * we retain large page mappings for boundaries spanning kernel text, rodata
- * and data sections.
+ * On 64-bit, align RODATA to 2MB so we retain large page mappings for
+ * boundaries spanning kernel text, rodata and data sections.
  *
  * However, kernel identity mappings will have different RWX permissions
  * to the pages mapping to text and to the pages padding (which are freed) the
  * text section. Hence kernel identity mappings will be broken to smaller
  * pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
- * as well as retain 2MB large page mappings for kernel text.
+ * so we can enable protection checks as well as retain 2MB large page
+ * mappings for kernel text.
  */
-#define X64_ALIGN_DEBUG_RODATA_BEGIN	. = ALIGN(HPAGE_SIZE);
+#define X64_ALIGN_RODATA_BEGIN	. = ALIGN(HPAGE_SIZE);
 
-#define X64_ALIGN_DEBUG_RODATA_END				\
+#define X64_ALIGN_RODATA_END					\
 		. = ALIGN(HPAGE_SIZE);				\
 		__end_rodata_hpage_align = .;
 
 #else
 
-#define X64_ALIGN_DEBUG_RODATA_BEGIN
-#define X64_ALIGN_DEBUG_RODATA_END
+#define X64_ALIGN_RODATA_BEGIN
+#define X64_ALIGN_RODATA_END
 
 #endif
 
@@ -112,13 +111,11 @@
 
 	EXCEPTION_TABLE(16) :text = 0x9090
 
-#if defined(CONFIG_DEBUG_RODATA)
 	/* .text should occupy whole number of pages */
 	. = ALIGN(PAGE_SIZE);
-#endif
-	X64_ALIGN_DEBUG_RODATA_BEGIN
+	X64_ALIGN_RODATA_BEGIN
 	RO_DATA(PAGE_SIZE)
-	X64_ALIGN_DEBUG_RODATA_END
+	X64_ALIGN_RODATA_END
 
 	/* Data */
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -195,6 +192,17 @@
 	:init
 #endif
 
+	/*
+	 * Section for code used exclusively before alternatives are run. All
+	 * references to such code must be patched out by alternatives, normally
+	 * by using X86_FEATURE_ALWAYS CPU feature bit.
+	 *
+	 * See static_cpu_has() for an example.
+	 */
+	.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+		*(.altinstr_aux)
+	}
+
 	INIT_DATA_SECTION(16)
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a0695be..cd05942 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -37,6 +37,8 @@
 EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(_copy_to_user);
 
+EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1505587..b9b09fe 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -650,10 +650,10 @@
 	u16 sel;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
-	*linear = la;
 	*max_size = 0;
 	switch (mode) {
 	case X86EMUL_MODE_PROT64:
+		*linear = la;
 		if (is_noncanonical_address(la))
 			goto bad;
 
@@ -662,6 +662,7 @@
 			goto bad;
 		break;
 	default:
+		*linear = la = (u32)la;
 		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
 						addr.seg);
 		if (!usable)
@@ -689,7 +690,6 @@
 			if (size > *max_size)
 				goto bad;
 		}
-		la &= (u32)-1;
 		break;
 	}
 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 36591fa..3a045f3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1195,7 +1195,7 @@
 static void apic_timer_expired(struct kvm_lapic *apic)
 {
 	struct kvm_vcpu *vcpu = apic->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
+	struct swait_queue_head *q = &vcpu->wq;
 	struct kvm_timer *ktimer = &apic->lapic_timer;
 
 	if (atomic_read(&apic->lapic_timer.pending))
@@ -1204,8 +1204,8 @@
 	atomic_inc(&apic->lapic_timer.pending);
 	kvm_set_pending_timer(vcpu);
 
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
+	if (swait_active(q))
+		swake_up(q);
 
 	if (apic_lvtt_tscdeadline(apic))
 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 95a955d..1e7a49b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3721,13 +3721,15 @@
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
+	bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+
 	/*
 	 * Passing "true" to the last argument is okay; it adds a check
 	 * on bit 8 of the SPTEs which KVM doesn't use anyway.
 	 */
 	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
 				boot_cpu_data.x86_phys_bits,
-				context->shadow_root_level, context->nx,
+				context->shadow_root_level, uses_nx,
 				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
 				true);
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6c9fed9..2ce4f05 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -249,7 +249,7 @@
 			return ret;
 
 		kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
-		walker->ptes[level] = pte;
+		walker->ptes[level - 1] = pte;
 	}
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2951b6..9bd8f44 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,6 +596,8 @@
 	/* Support for PML */
 #define PML_ENTITY_NUM		512
 	struct page *pml_pg;
+
+	u64 current_tsc_ratio;
 };
 
 enum segment_cache_field {
@@ -1811,6 +1813,13 @@
 			return;
 		}
 		break;
+	case MSR_IA32_PEBS_ENABLE:
+		/* PEBS needs a quiescent period after being disabled (to write
+		 * a record).  Disabling PEBS through VMX MSR swapping doesn't
+		 * provide that period, so a CPU could write host's record into
+		 * guest's memory.
+		 */
+		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 	}
 
 	for (i = 0; i < m->nr; ++i)
@@ -1848,26 +1857,31 @@
 
 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
-	u64 guest_efer;
-	u64 ignore_bits;
+	u64 guest_efer = vmx->vcpu.arch.efer;
+	u64 ignore_bits = 0;
 
-	guest_efer = vmx->vcpu.arch.efer;
+	if (!enable_ept) {
+		/*
+		 * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+		 * host CPUID is more efficient than testing guest CPUID
+		 * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+		 */
+		if (boot_cpu_has(X86_FEATURE_SMEP))
+			guest_efer |= EFER_NX;
+		else if (!(guest_efer & EFER_NX))
+			ignore_bits |= EFER_NX;
+	}
 
 	/*
-	 * NX is emulated; LMA and LME handled by hardware; SCE meaningless
-	 * outside long mode
+	 * LMA and LME handled by hardware; SCE meaningless outside long mode.
 	 */
-	ignore_bits = EFER_NX | EFER_SCE;
+	ignore_bits |= EFER_SCE;
 #ifdef CONFIG_X86_64
 	ignore_bits |= EFER_LMA | EFER_LME;
 	/* SCE is meaningful only in long mode on Intel */
 	if (guest_efer & EFER_LMA)
 		ignore_bits &= ~(u64)EFER_SCE;
 #endif
-	guest_efer &= ~ignore_bits;
-	guest_efer |= host_efer & ignore_bits;
-	vmx->guest_msrs[efer_offset].data = guest_efer;
-	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
 	clear_atomic_switch_msr(vmx, MSR_EFER);
 
@@ -1878,16 +1892,21 @@
 	 */
 	if (cpu_has_load_ia32_efer ||
 	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
-		guest_efer = vmx->vcpu.arch.efer;
 		if (!(guest_efer & EFER_LMA))
 			guest_efer &= ~EFER_LME;
 		if (guest_efer != host_efer)
 			add_atomic_switch_msr(vmx, MSR_EFER,
 					      guest_efer, host_efer);
 		return false;
-	}
+	} else {
+		guest_efer &= ~ignore_bits;
+		guest_efer |= host_efer & ignore_bits;
 
-	return true;
+		vmx->guest_msrs[efer_offset].data = guest_efer;
+		vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+		return true;
+	}
 }
 
 static unsigned long segment_base(u16 selector)
@@ -2127,14 +2146,16 @@
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
 
-		/* Setup TSC multiplier */
-		if (cpu_has_vmx_tsc_scaling())
-			vmcs_write64(TSC_MULTIPLIER,
-				     vcpu->arch.tsc_scaling_ratio);
-
 		vmx->loaded_vmcs->cpu = cpu;
 	}
 
+	/* Setup TSC multiplier */
+	if (kvm_has_tsc_control &&
+	    vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
+		vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+		vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+	}
+
 	vmx_vcpu_pi_load(vcpu, cpu);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4244c2b..eaf6ee8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6618,12 +6618,12 @@
 	 * KVM_DEBUGREG_WONT_EXIT again.
 	 */
 	if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
-		int i;
-
 		WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
 		kvm_x86_ops->sync_dirty_debug_regs(vcpu);
-		for (i = 0; i < KVM_NR_DB_REGS; i++)
-			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+		kvm_update_dr0123(vcpu);
+		kvm_update_dr6(vcpu);
+		kvm_update_dr7(vcpu);
+		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
 	/*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4ba229a..fd57d3a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1520,12 +1520,6 @@
 	 */
 	reserve_top_address(lguest_data.reserve_mem);
 
-	/*
-	 * If we don't initialize the lock dependency checker now, it crashes
-	 * atomic_notifier_chain_register, then paravirt_disable_iospace.
-	 */
-	lockdep_init();
-
 	/* Hook in our special panic hypercall code. */
 	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
 
@@ -1535,7 +1529,7 @@
 	 */
 	cpu_detect(&new_cpu_data);
 	/* head.S usually sets up the first capability word, so do it here. */
-	new_cpu_data.x86_capability[0] = cpuid_edx(1);
+	new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 
 	/* Math is always hard! */
 	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a2fe51b..65be7cf 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,5 @@
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 422db00..5cc78bf 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -21,12 +21,16 @@
  * @option: option string to look for
  *
  * Returns the position of that @option (starts counting with 1)
- * or 0 on not found.
+ * or 0 on not found.  @option will only be found if it is found
+ * as an entire word in @cmdline.  For instance, if @option="car"
+ * then a cmdline which contains "cart" will not match.
  */
-int cmdline_find_option_bool(const char *cmdline, const char *option)
+static int
+__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
+			   const char *option)
 {
 	char c;
-	int len, pos = 0, wstart = 0;
+	int pos = 0, wstart = 0;
 	const char *opptr = NULL;
 	enum {
 		st_wordstart = 0,	/* Start of word/after whitespace */
@@ -37,11 +41,11 @@
 	if (!cmdline)
 		return -1;      /* No command line */
 
-	len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE);
-	if (!len)
-		return 0;
-
-	while (len--) {
+	/*
+	 * This 'pos' check ensures we do not overrun
+	 * a non-NULL-terminated 'cmdline'
+	 */
+	while (pos < max_cmdline_size) {
 		c = *(char *)cmdline++;
 		pos++;
 
@@ -58,18 +62,35 @@
 			/* fall through */
 
 		case st_wordcmp:
-			if (!*opptr)
+			if (!*opptr) {
+				/*
+				 * We matched all the way to the end of the
+				 * option we were looking for.  If the
+				 * command-line has a space _or_ ends, then
+				 * we matched!
+				 */
 				if (!c || myisspace(c))
 					return wstart;
-				else
-					state = st_wordskip;
-			else if (!c)
+				/*
+				 * We hit the end of the option, but _not_
+				 * the end of a word on the cmdline.  Not
+				 * a match.
+				 */
+			} else if (!c) {
+				/*
+				 * Hit the NULL terminator on the end of
+				 * cmdline.
+				 */
 				return 0;
-			else if (c != *opptr++)
-				state = st_wordskip;
-			else if (!len)		/* last word and is matching */
-				return wstart;
-			break;
+			} else if (c == *opptr++) {
+				/*
+				 * We are currently matching, so continue
+				 * to the next character on the cmdline.
+				 */
+				break;
+			}
+			state = st_wordskip;
+			/* fall through */
 
 		case st_wordskip:
 			if (!c)
@@ -82,3 +103,8 @@
 
 	return 0;	/* Buffer overrun */
 }
+
+int cmdline_find_option_bool(const char *cmdline, const char *option)
+{
+	return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+}
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 009f982..24ef1c2 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,7 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 982ce34..2b0ef26 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -10,7 +10,7 @@
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
@@ -232,17 +232,31 @@
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
+ * This will force destination out of cache for more performance.
+ *
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ *  - Require 8-byte alignment when size is 8 bytes or larger.
+ *  - Require 4-byte alignment when size is 4 bytes.
  */
 ENTRY(__copy_user_nocache)
 	ASM_STAC
+
+	/* If size is less than 8 bytes, go to 4-byte copy */
 	cmpl $8,%edx
-	jb 20f		/* less then 8 bytes, go to byte copy loop */
+	jb .L_4b_nocache_copy_entry
+
+	/* If destination is not 8-byte aligned, "cache" copy to align it */
 	ALIGN_DESTINATION
+
+	/* Set 4x8-byte copy count and remainder */
 	movl %edx,%ecx
 	andl $63,%edx
 	shrl $6,%ecx
-	jz 17f
+	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 4x8-byte nocache loop-copy */
+.L_4x8b_nocache_copy_loop:
 1:	movq (%rsi),%r8
 2:	movq 1*8(%rsi),%r9
 3:	movq 2*8(%rsi),%r10
@@ -262,60 +276,106 @@
 	leaq 64(%rsi),%rsi
 	leaq 64(%rdi),%rdi
 	decl %ecx
-	jnz 1b
-17:	movl %edx,%ecx
+	jnz .L_4x8b_nocache_copy_loop
+
+	/* Set 8-byte copy count and remainder */
+.L_8b_nocache_copy_entry:
+	movl %edx,%ecx
 	andl $7,%edx
 	shrl $3,%ecx
-	jz 20f
-18:	movq (%rsi),%r8
-19:	movnti %r8,(%rdi)
+	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 8-byte nocache loop-copy */
+.L_8b_nocache_copy_loop:
+20:	movq (%rsi),%r8
+21:	movnti %r8,(%rdi)
 	leaq 8(%rsi),%rsi
 	leaq 8(%rdi),%rdi
 	decl %ecx
-	jnz 18b
-20:	andl %edx,%edx
-	jz 23f
+	jnz .L_8b_nocache_copy_loop
+
+	/* If no byte left, we're done */
+.L_4b_nocache_copy_entry:
+	andl %edx,%edx
+	jz .L_finish_copy
+
+	/* If destination is not 4-byte aligned, go to byte copy: */
+	movl %edi,%ecx
+	andl $3,%ecx
+	jnz .L_1b_cache_copy_entry
+
+	/* Set 4-byte copy count (1 or 0) and remainder */
 	movl %edx,%ecx
-21:	movb (%rsi),%al
-22:	movb %al,(%rdi)
+	andl $3,%edx
+	shrl $2,%ecx
+	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 4-byte nocache copy: */
+30:	movl (%rsi),%r8d
+31:	movnti %r8d,(%rdi)
+	leaq 4(%rsi),%rsi
+	leaq 4(%rdi),%rdi
+
+	/* If no bytes left, we're done: */
+	andl %edx,%edx
+	jz .L_finish_copy
+
+	/* Perform byte "cache" loop-copy for the remainder */
+.L_1b_cache_copy_entry:
+	movl %edx,%ecx
+.L_1b_cache_copy_loop:
+40:	movb (%rsi),%al
+41:	movb %al,(%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz 21b
-23:	xorl %eax,%eax
+	jnz .L_1b_cache_copy_loop
+
+	/* Finished copying; fence the prior stores */
+.L_finish_copy:
+	xorl %eax,%eax
 	ASM_CLAC
 	sfence
 	ret
 
 	.section .fixup,"ax"
-30:	shll $6,%ecx
+.L_fixup_4x8b_copy:
+	shll $6,%ecx
 	addl %ecx,%edx
-	jmp 60f
-40:	lea (%rdx,%rcx,8),%rdx
-	jmp 60f
-50:	movl %ecx,%edx
-60:	sfence
+	jmp .L_fixup_handle_tail
+.L_fixup_8b_copy:
+	lea (%rdx,%rcx,8),%rdx
+	jmp .L_fixup_handle_tail
+.L_fixup_4b_copy:
+	lea (%rdx,%rcx,4),%rdx
+	jmp .L_fixup_handle_tail
+.L_fixup_1b_copy:
+	movl %ecx,%edx
+.L_fixup_handle_tail:
+	sfence
 	jmp copy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE(1b,30b)
-	_ASM_EXTABLE(2b,30b)
-	_ASM_EXTABLE(3b,30b)
-	_ASM_EXTABLE(4b,30b)
-	_ASM_EXTABLE(5b,30b)
-	_ASM_EXTABLE(6b,30b)
-	_ASM_EXTABLE(7b,30b)
-	_ASM_EXTABLE(8b,30b)
-	_ASM_EXTABLE(9b,30b)
-	_ASM_EXTABLE(10b,30b)
-	_ASM_EXTABLE(11b,30b)
-	_ASM_EXTABLE(12b,30b)
-	_ASM_EXTABLE(13b,30b)
-	_ASM_EXTABLE(14b,30b)
-	_ASM_EXTABLE(15b,30b)
-	_ASM_EXTABLE(16b,30b)
-	_ASM_EXTABLE(18b,40b)
-	_ASM_EXTABLE(19b,40b)
-	_ASM_EXTABLE(21b,50b)
-	_ASM_EXTABLE(22b,50b)
+	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(30b,.L_fixup_4b_copy)
+	_ASM_EXTABLE(31b,.L_fixup_4b_copy)
+	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
+	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
 ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e912b2f..2f07c29 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -102,7 +102,7 @@
 		 * Use cpu_tss as a cacheline-aligned, seldomly
 		 * accessed per-cpu variable as the monitor target.
 		 */
-		__monitorx(this_cpu_ptr(&cpu_tss), 0, 0);
+		__monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
 
 		/*
 		 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 16698bb..cbb8ee5 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
@@ -177,3 +177,120 @@
 .Lend:
 	retq
 ENDPROC(memcpy_orig)
+
+#ifndef CONFIG_UML
+/*
+ * memcpy_mcsafe - memory copy with machine check exception handling
+ * Note that we only catch machine checks when reading the source addresses.
+ * Writes to target are posted and don't generate machine checks.
+ */
+ENTRY(memcpy_mcsafe)
+	cmpl $8, %edx
+	/* Less than 8 bytes? Go to byte copy loop */
+	jb .L_no_whole_words
+
+	/* Check for bad alignment of source */
+	testl $7, %esi
+	/* Already aligned */
+	jz .L_8byte_aligned
+
+	/* Copy one byte at a time until source is 8-byte aligned */
+	movl %esi, %ecx
+	andl $7, %ecx
+	subl $8, %ecx
+	negl %ecx
+	subl %ecx, %edx
+.L_copy_leading_bytes:
+	movb (%rsi), %al
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_copy_leading_bytes
+
+.L_8byte_aligned:
+	/* Figure out how many whole cache lines (64-bytes) to copy */
+	movl %edx, %ecx
+	andl $63, %edx
+	shrl $6, %ecx
+	jz .L_no_whole_cache_lines
+
+	/* Loop copying whole cache lines */
+.L_cache_w0: movq (%rsi), %r8
+.L_cache_w1: movq 1*8(%rsi), %r9
+.L_cache_w2: movq 2*8(%rsi), %r10
+.L_cache_w3: movq 3*8(%rsi), %r11
+	movq %r8, (%rdi)
+	movq %r9, 1*8(%rdi)
+	movq %r10, 2*8(%rdi)
+	movq %r11, 3*8(%rdi)
+.L_cache_w4: movq 4*8(%rsi), %r8
+.L_cache_w5: movq 5*8(%rsi), %r9
+.L_cache_w6: movq 6*8(%rsi), %r10
+.L_cache_w7: movq 7*8(%rsi), %r11
+	movq %r8, 4*8(%rdi)
+	movq %r9, 5*8(%rdi)
+	movq %r10, 6*8(%rdi)
+	movq %r11, 7*8(%rdi)
+	leaq 64(%rsi), %rsi
+	leaq 64(%rdi), %rdi
+	decl %ecx
+	jnz .L_cache_w0
+
+	/* Are there any trailing 8-byte words? */
+.L_no_whole_cache_lines:
+	movl %edx, %ecx
+	andl $7, %edx
+	shrl $3, %ecx
+	jz .L_no_whole_words
+
+	/* Copy trailing words */
+.L_copy_trailing_words:
+	movq (%rsi), %r8
+	mov %r8, (%rdi)
+	leaq 8(%rsi), %rsi
+	leaq 8(%rdi), %rdi
+	decl %ecx
+	jnz .L_copy_trailing_words
+
+	/* Any trailing bytes? */
+.L_no_whole_words:
+	andl %edx, %edx
+	jz .L_done_memcpy_trap
+
+	/* Copy trailing bytes */
+	movl %edx, %ecx
+.L_copy_trailing_bytes:
+	movb (%rsi), %al
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_copy_trailing_bytes
+
+	/* Copy successful. Return true */
+.L_done_memcpy_trap:
+	xorq %rax, %rax
+	ret
+ENDPROC(memcpy_mcsafe)
+
+	.section .fixup, "ax"
+	/* Return false for any failure */
+.L_memcpy_mcsafe_fail:
+	mov	$1, %rax
+	ret
+
+	.previous
+
+	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+#endif
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index ca2afdd..90ce01b 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,7 @@
  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 #undef memmove
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2661fad..c9c8122 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen, SuSE Labs */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 .weak memset
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 4a6f1d9..99bfb19 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -358,20 +358,19 @@
 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 #endif
 
-#ifdef CONFIG_X86_64
 static inline bool is_hypervisor_range(int idx)
 {
+#ifdef CONFIG_X86_64
 	/*
 	 * ffff800000000000 - ffff87ffffffffff is reserved for
 	 * the hypervisor.
 	 */
-	return paravirt_enabled() &&
-		(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
-		(idx < pgd_index(__PAGE_OFFSET));
-}
+	return	(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+		(idx <  pgd_index(__PAGE_OFFSET));
 #else
-static inline bool is_hypervisor_range(int idx) { return false; }
+	return false;
 #endif
+}
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 				       bool checkwx)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e..9dd7e4b 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -3,6 +3,9 @@
 #include <linux/sort.h>
 #include <asm/uaccess.h>
 
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+			    struct pt_regs *, int);
+
 static inline unsigned long
 ex_insn_addr(const struct exception_table_entry *x)
 {
@@ -13,11 +16,56 @@
 {
 	return (unsigned long)&x->fixup + x->fixup;
 }
-
-int fixup_exception(struct pt_regs *regs)
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
 {
-	const struct exception_table_entry *fixup;
-	unsigned long new_ip;
+	return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+bool ex_handler_default(const struct exception_table_entry *fixup,
+		       struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_default);
+
+bool ex_handler_fault(const struct exception_table_entry *fixup,
+		     struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+	regs->ax = trapnr;
+	return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fault);
+
+bool ex_handler_ext(const struct exception_table_entry *fixup,
+		   struct pt_regs *regs, int trapnr)
+{
+	/* Special hack for uaccess_err */
+	current_thread_info()->uaccess_err = 1;
+	regs->ip = ex_fixup_addr(fixup);
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_ext);
+
+bool ex_has_fault_handler(unsigned long ip)
+{
+	const struct exception_table_entry *e;
+	ex_handler_t handler;
+
+	e = search_exception_tables(ip);
+	if (!e)
+		return false;
+	handler = ex_fixup_handler(e);
+
+	return handler == ex_handler_fault;
+}
+
+int fixup_exception(struct pt_regs *regs, int trapnr)
+{
+	const struct exception_table_entry *e;
+	ex_handler_t handler;
 
 #ifdef CONFIG_PNPBIOS
 	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -33,42 +81,34 @@
 	}
 #endif
 
-	fixup = search_exception_tables(regs->ip);
-	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
+	e = search_exception_tables(regs->ip);
+	if (!e)
+		return 0;
 
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
-			/* Special hack for uaccess_err */
-			current_thread_info()->uaccess_err = 1;
-			new_ip -= 0x7ffffff0;
-		}
-		regs->ip = new_ip;
-		return 1;
-	}
-
-	return 0;
+	handler = ex_fixup_handler(e);
+	return handler(e, regs, trapnr);
 }
 
 /* Restricted version used during very early boot */
 int __init early_fixup_exception(unsigned long *ip)
 {
-	const struct exception_table_entry *fixup;
+	const struct exception_table_entry *e;
 	unsigned long new_ip;
+	ex_handler_t handler;
 
-	fixup = search_exception_tables(*ip);
-	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
+	e = search_exception_tables(*ip);
+	if (!e)
+		return 0;
 
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
-			/* uaccess handling not supported during early boot */
-			return 0;
-		}
+	new_ip  = ex_fixup_addr(e);
+	handler = ex_fixup_handler(e);
 
-		*ip = new_ip;
-		return 1;
-	}
+	/* special handling not supported during early boot */
+	if (handler != ex_handler_default)
+		return 0;
 
-	return 0;
+	*ip = new_ip;
+	return 1;
 }
 
 /*
@@ -133,6 +173,8 @@
 		i += 4;
 		p->fixup += i;
 		i += 4;
+		p->handler += i;
+		i += 4;
 	}
 
 	sort(start, finish - start, sizeof(struct exception_table_entry),
@@ -145,6 +187,8 @@
 		i += 4;
 		p->fixup -= i;
 		i += 4;
+		p->handler -= i;
+		i += 4;
 	}
 }
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index eef44d9..03898ae 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -287,6 +287,9 @@
 	if (!pmd_k)
 		return -1;
 
+	if (pmd_huge(*pmd_k))
+		return 0;
+
 	pte_k = pte_offset_kernel(pmd_k, address);
 	if (!pte_present(*pte_k))
 		return -1;
@@ -360,8 +363,6 @@
  * 64-bit:
  *
  *   Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
  */
 static noinline int vmalloc_fault(unsigned long address)
 {
@@ -403,17 +404,23 @@
 	if (pud_none(*pud_ref))
 		return -1;
 
-	if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+	if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
 		BUG();
 
+	if (pud_huge(*pud))
+		return 0;
+
 	pmd = pmd_offset(pud, address);
 	pmd_ref = pmd_offset(pud_ref, address);
 	if (pmd_none(*pmd_ref))
 		return -1;
 
-	if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+	if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
 		BUG();
 
+	if (pmd_huge(*pmd))
+		return 0;
+
 	pte_ref = pte_offset_kernel(pmd_ref, address);
 	if (!pte_present(*pte_ref))
 		return -1;
@@ -656,7 +663,7 @@
 	int sig;
 
 	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs)) {
+	if (fixup_exception(regs, X86_TRAP_PF)) {
 		/*
 		 * Any interrupt that takes a fault gets the fixup. This makes
 		 * the below recursive fault logic only apply to a faults from
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 6d5eb59..d8a798d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -102,7 +102,6 @@
 			return 0;
 		}
 
-		page = pte_page(pte);
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
@@ -115,6 +114,7 @@
 			return 0;
 		}
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
 		get_page(page);
 		put_dev_pagemap(pgmap);
 		SetPageReferenced(page);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 42982b2..740d7ac 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -173,10 +173,10 @@
 }
 __setup("hugepagesz=", setup_hugepagesz);
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 static __init int gigantic_pages_init(void)
 {
-	/* With CMA we can allocate gigantic pages at runtime */
+	/* With compaction or CMA we can allocate gigantic pages at runtime */
 	if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 	return 0;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cb4ef3d..bd7a9b9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -388,7 +388,6 @@
 }
 
 pte_t *kmap_pte;
-pgprot_t kmap_prot;
 
 static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
 {
@@ -405,8 +404,6 @@
 	 */
 	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-	kmap_prot = PAGE_KERNEL;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -871,7 +868,6 @@
 	return flag;
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
@@ -960,5 +956,3 @@
 	if (__supported_pte_mask & _PAGE_NX)
 		debug_checkwx();
 }
-#endif
-
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5488d21..214afda 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 #include <asm/init.h>
+#include <asm/uv/uv.h>
 #include <asm/setup.h>
 
 #include "mm_internal.h"
@@ -1074,7 +1075,6 @@
 	mem_init_print_info(NULL);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
@@ -1166,8 +1166,6 @@
 	debug_checkwx();
 }
 
-#endif
-
 int kern_addr_valid(unsigned long addr)
 {
 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1206,26 +1204,13 @@
 
 static unsigned long probe_memory_block_size(void)
 {
-	/* start from 2g */
-	unsigned long bz = 1UL<<31;
+	unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
 
-	if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) {
-		pr_info("Using 2GB memory block size for large-memory system\n");
-		return 2UL * 1024 * 1024 * 1024;
-	}
+	/* if system is UV or has 64GB of RAM or more, use large blocks */
+	if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
+		bz = 2UL << 30; /* 2GB */
 
-	/* less than 64g installed */
-	if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
-		return MIN_MEMORY_BLOCK_SIZE;
-
-	/* get the tail size */
-	while (bz > MIN_MEMORY_BLOCK_SIZE) {
-		if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
-			break;
-		bz >>= 1;
-	}
-
-	printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+	pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
 
 	return bz;
 }
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d470cf2..1b1110f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -120,11 +120,22 @@
 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
 			(void *)KASAN_SHADOW_END);
 
-	memset(kasan_zero_page, 0, PAGE_SIZE);
-
 	load_cr3(init_level4_pgt);
 	__flush_tlb_all();
-	init_task.kasan_depth = 0;
 
+	/*
+	 * kasan_zero_page has been used as early shadow memory, thus it may
+	 * contain some garbage. Now we can clear and write protect it, since
+	 * after the TLB flush no one should write to it.
+	 */
+	memset(kasan_zero_page, 0, PAGE_SIZE);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+		set_pte(&kasan_zero_pte[i], pte);
+	}
+	/* Flush TLBs again to be sure that write protection applied. */
+	__flush_tlb_all();
+
+	init_task.kasan_depth = 0;
 	pr_info("KernelAddressSanitizer initialized\n");
 }
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34..ddb2244 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -33,7 +33,7 @@
 struct kmmio_fault_page {
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
-	unsigned long page; /* location of the fault page */
+	unsigned long addr; /* the requested address */
 	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
@@ -70,9 +70,16 @@
 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
 static LIST_HEAD(kmmio_probes);
 
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
 {
-	return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+
+	return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
 }
 
 /* Accessed per-cpu */
@@ -98,15 +105,19 @@
 }
 
 /* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
 {
 	struct list_head *head;
 	struct kmmio_fault_page *f;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
 
-	page &= PAGE_MASK;
-	head = kmmio_page_list(page);
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+	head = kmmio_page_list(addr);
 	list_for_each_entry_rcu(f, head, list) {
-		if (f->page == page)
+		if (f->addr == addr)
 			return f;
 	}
 	return NULL;
@@ -137,10 +148,10 @@
 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(f->page, &level);
+	pte_t *pte = lookup_address(f->addr, &level);
 
 	if (!pte) {
-		pr_err("no pte for page 0x%08lx\n", f->page);
+		pr_err("no pte for addr 0x%08lx\n", f->addr);
 		return -1;
 	}
 
@@ -156,7 +167,7 @@
 		return -1;
 	}
 
-	__flush_tlb_one(f->page);
+	__flush_tlb_one(f->addr);
 	return 0;
 }
 
@@ -176,12 +187,12 @@
 	int ret;
 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
 	if (f->armed) {
-		pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
-			   f->page, f->count, !!f->old_presence);
+		pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+			   f->addr, f->count, !!f->old_presence);
 	}
 	ret = clear_page_presence(f, true);
-	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
-		  f->page);
+	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+		  f->addr);
 	f->armed = true;
 	return ret;
 }
@@ -191,7 +202,7 @@
 {
 	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
-			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+			KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
 	f->armed = false;
 }
 
@@ -215,6 +226,12 @@
 	struct kmmio_context *ctx;
 	struct kmmio_fault_page *faultpage;
 	int ret = 0; /* default to fault not handled */
+	unsigned long page_base = addr;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+	if (!pte)
+		return -EINVAL;
+	page_base &= page_level_mask(l);
 
 	/*
 	 * Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@
 	preempt_disable();
 	rcu_read_lock();
 
-	faultpage = get_kmmio_fault_page(addr);
+	faultpage = get_kmmio_fault_page(page_base);
 	if (!faultpage) {
 		/*
 		 * Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@
 
 	ctx = &get_cpu_var(kmmio_ctx);
 	if (ctx->active) {
-		if (addr == ctx->addr) {
+		if (page_base == ctx->addr) {
 			/*
 			 * A second fault on the same page means some other
 			 * condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@
 	ctx->active++;
 
 	ctx->fpage = faultpage;
-	ctx->probe = get_kmmio_probe(addr);
+	ctx->probe = get_kmmio_probe(page_base);
 	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
-	ctx->addr = addr;
+	ctx->addr = page_base;
 
 	if (ctx->probe && ctx->probe->pre_handler)
 		ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@
 }
 
 /* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (f) {
 		if (!f->count)
 			arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@
 		return -1;
 
 	f->count = 1;
-	f->page = page;
+	f->addr = addr;
 
 	if (arm_kmmio_fault_page(f)) {
 		kfree(f);
 		return -1;
 	}
 
-	list_add_rcu(&f->list, kmmio_page_list(f->page));
+	list_add_rcu(&f->list, kmmio_page_list(f->addr));
 
 	return 0;
 }
 
 /* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
 				struct kmmio_fault_page **release_list)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (!f)
 		return;
 
@@ -420,18 +435,27 @@
 	int ret = 0;
 	unsigned long size = 0;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+	unsigned int l;
+	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	if (get_kmmio_probe(p->addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
 		if (add_kmmio_fault_page(p->addr + size))
 			pr_err("Unable to set page fault.\n");
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 out:
 	spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
+	unsigned int l;
+	pte_t *pte;
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte)
+		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
 		release_kmmio_fault_page(p->addr + size, &release_list);
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
 	kmmio_count--;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 96bd1e2..d2dc043 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,12 +71,12 @@
 
 	if (mmap_is_ia32())
 #ifdef CONFIG_COMPAT
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 #else
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 #endif
 	else
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
 	return rnd << PAGE_SHIFT;
 }
@@ -94,18 +94,6 @@
 }
 
 /*
- * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
- * does, but not when emulating X86_32
- */
-static unsigned long mmap_legacy_base(unsigned long rnd)
-{
-	if (mmap_is_ia32())
-		return TASK_UNMAPPED_BASE;
-	else
-		return TASK_UNMAPPED_BASE + rnd;
-}
-
-/*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
@@ -116,7 +104,7 @@
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
-	mm->mmap_legacy_base = mmap_legacy_base(random_factor);
+	mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index b2fd67d..ef05755 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -123,7 +123,7 @@
 		break;
 	}
 
-	if (regno > nr_registers) {
+	if (regno >= nr_registers) {
 		WARN_ONCE(1, "decoded an instruction with an invalid register");
 		return -EINVAL;
 	}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c3b3f65..f70c1ff 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -465,46 +465,67 @@
 	return true;
 }
 
+/*
+ * Mark all currently memblock-reserved physical memory (which covers the
+ * kernel's own memory ranges) as hot-unswappable.
+ */
 static void __init numa_clear_kernel_node_hotplug(void)
 {
-	int i, nid;
-	nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
-	unsigned long start, end;
-	struct memblock_region *r;
+	nodemask_t reserved_nodemask = NODE_MASK_NONE;
+	struct memblock_region *mb_region;
+	int i;
 
 	/*
+	 * We have to do some preprocessing of memblock regions, to
+	 * make them suitable for reservation.
+	 *
 	 * At this time, all memory regions reserved by memblock are
-	 * used by the kernel. Set the nid in memblock.reserved will
-	 * mark out all the nodes the kernel resides in.
+	 * used by the kernel, but those regions are not split up
+	 * along node boundaries yet, and don't necessarily have their
+	 * node ID set yet either.
+	 *
+	 * So iterate over all memory known to the x86 architecture,
+	 * and use those ranges to set the nid in memblock.reserved.
+	 * This will split up the memblock regions along node
+	 * boundaries and will set the node IDs as well.
 	 */
 	for (i = 0; i < numa_meminfo.nr_blks; i++) {
-		struct numa_memblk *mb = &numa_meminfo.blk[i];
+		struct numa_memblk *mb = numa_meminfo.blk + i;
+		int ret;
 
-		memblock_set_node(mb->start, mb->end - mb->start,
-				  &memblock.reserved, mb->nid);
+		ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
+		WARN_ON_ONCE(ret);
 	}
 
 	/*
-	 * Mark all kernel nodes.
+	 * Now go over all reserved memblock regions, to construct a
+	 * node mask of all kernel reserved memory areas.
 	 *
-	 * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
-	 * may not include all the memblock.reserved memory ranges because
-	 * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+	 * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+	 *   numa_meminfo might not include all memblock.reserved
+	 *   memory ranges, because quirks such as trim_snb_memory()
+	 *   reserve specific pages for Sandy Bridge graphics. ]
 	 */
-	for_each_memblock(reserved, r)
-		if (r->nid != MAX_NUMNODES)
-			node_set(r->nid, numa_kernel_nodes);
+	for_each_memblock(reserved, mb_region) {
+		if (mb_region->nid != MAX_NUMNODES)
+			node_set(mb_region->nid, reserved_nodemask);
+	}
 
-	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+	/*
+	 * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
+	 * belonging to the reserved node mask.
+	 *
+	 * Note that this will include memory regions that reside
+	 * on nodes that contain kernel memory - entire nodes
+	 * become hot-unpluggable:
+	 */
 	for (i = 0; i < numa_meminfo.nr_blks; i++) {
-		nid = numa_meminfo.blk[i].nid;
-		if (!node_isset(nid, numa_kernel_nodes))
+		struct numa_memblk *mb = numa_meminfo.blk + i;
+
+		if (!node_isset(mb->nid, reserved_nodemask))
 			continue;
 
-		start = numa_meminfo.blk[i].start;
-		end = numa_meminfo.blk[i].end;
-
-		memblock_clear_hotplug(start, end - start);
+		memblock_clear_hotplug(mb->start, mb->end - mb->start);
 	}
 }
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fc6a4c8..007ebe2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -33,7 +33,7 @@
 	pgd_t		*pgd;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
-	int		numpages;
+	unsigned long	numpages;
 	int		flags;
 	unsigned long	pfn;
 	unsigned	force_split : 1;
@@ -283,7 +283,7 @@
 		   __pa_symbol(__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 	/*
 	 * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
 	 * kernel text mappings for the large page aligned text, rodata sections
@@ -419,24 +419,30 @@
 phys_addr_t slow_virt_to_phys(void *__virt_addr)
 {
 	unsigned long virt_addr = (unsigned long)__virt_addr;
-	unsigned long phys_addr, offset;
+	phys_addr_t phys_addr;
+	unsigned long offset;
 	enum pg_level level;
 	pte_t *pte;
 
 	pte = lookup_address(virt_addr, &level);
 	BUG_ON(!pte);
 
+	/*
+	 * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+	 * before being left-shifted PAGE_SHIFT bits -- this trick is to
+	 * make 32-PAE kernel work correctly.
+	 */
 	switch (level) {
 	case PG_LEVEL_1G:
-		phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+		phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
 		offset = virt_addr & ~PUD_PAGE_MASK;
 		break;
 	case PG_LEVEL_2M:
-		phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+		phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
 		offset = virt_addr & ~PMD_PAGE_MASK;
 		break;
 	default:
-		phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+		phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
 		offset = virt_addr & ~PAGE_MASK;
 	}
 
@@ -1122,8 +1128,10 @@
 	/*
 	 * Ignore all non primary paths.
 	 */
-	if (!primary)
+	if (!primary) {
+		cpa->numpages = 1;
 		return 0;
+	}
 
 	/*
 	 * Ignore the NULL PTE for kernel identity mapping, as it is expected
@@ -1350,7 +1358,7 @@
 		 * CPA operation. Either a large page has been
 		 * preserved or a single page update happened.
 		 */
-		BUG_ON(cpa->numpages > numpages);
+		BUG_ON(cpa->numpages > numpages || !cpa->numpages);
 		numpages -= cpa->numpages;
 		if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
 			cpa->curpage++;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f4ae536..04e2e71 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -943,7 +943,7 @@
 			return -EINVAL;
 	}
 
-	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
 			 cachemode2protval(pcm));
 
 	return 0;
@@ -959,7 +959,7 @@
 
 	/* Set prot based on lookup */
 	pcm = lookup_memtype(pfn_t_to_phys(pfn));
-	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
 			 cachemode2protval(pcm));
 
 	return 0;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eac..8bea847 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -4,6 +4,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/proto.h>
+#include <asm/cpufeature.h>
 
 static int disable_nx;
 
@@ -31,9 +32,8 @@
 
 void x86_configure_nx(void)
 {
-	if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
-		__supported_pte_mask |= _PAGE_NX;
-	else
+	/* If disable_nx is set, clear NX on all new mappings going forward. */
+	if (disable_nx)
 		__supported_pte_mask &= ~_PAGE_NX;
 }
 
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 50d86c0..660a83c 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -24,7 +24,6 @@
 #include <asm/nmi.h>
 #include <asm/apic.h>
 #include <asm/processor.h>
-#include <asm/cpufeature.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2879efc..d34b511 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -711,28 +711,22 @@
 	return 0;
 }
 
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
-	/*
-	 * If the PCI device was already claimed by core code and has
-	 * MSI enabled, probing of the pcibios IRQ will overwrite
-	 * dev->irq.  So bail out if MSI is already enabled.
-	 */
-	if (pci_dev_msi_enabled(dev))
-		return -EBUSY;
-
-	return pcibios_enable_irq(dev);
-}
-
-void pcibios_free_irq(struct pci_dev *dev)
-{
-	if (pcibios_disable_irq)
-		pcibios_disable_irq(dev);
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	return pci_enable_resources(dev, mask);
+	int err;
+
+	if ((err = pci_enable_resources(dev, mask)) < 0)
+		return err;
+
+	if (!pci_dev_msi_enabled(dev))
+		return pcibios_enable_irq(dev);
+	return 0;
+}
+
+void pcibios_disable_device (struct pci_dev *dev)
+{
+	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+		pcibios_disable_irq(dev);
 }
 
 int pci_ext_cfg_avail(void)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 0d24e7c..8b93e63 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,7 +215,7 @@
 	int polarity;
 	int ret;
 
-	if (pci_has_managed_irq(dev))
+	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
 	switch (intel_mid_identify_cpu()) {
@@ -256,13 +256,10 @@
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (pci_has_managed_irq(dev)) {
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
-		/*
-		 * Don't reset dev->irq here, otherwise
-		 * intel_mid_pci_irq_enable() will fail on next call.
-		 */
 	}
 }
 
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 32e7034..9bd1154 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1202,7 +1202,7 @@
 			struct pci_dev *temp_dev;
 			int irq;
 
-			if (pci_has_managed_irq(dev))
+			if (dev->irq_managed && dev->irq > 0)
 				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,7 +1230,8 @@
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				pci_set_managed_irq(dev, irq);
+				dev->irq_managed = 1;
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
 				return 0;
@@ -1256,10 +1257,24 @@
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
+	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
-		pci_reset_managed_irq(dev);
+		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index ff31ab4..beac4df 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -196,7 +196,10 @@
 	return 0;
 
 error:
-	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+	if (ret == -ENOSYS)
+		dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+	else if (ret)
+		dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret);
 free:
 	kfree(v);
 	return ret;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 1c7380d..ed30e79 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -130,6 +131,27 @@
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
 
 /*
+ * Helper function for efi_reserve_boot_services() to figure out if we
+ * can free regions in efi_free_boot_services().
+ *
+ * Use this function to ensure we do not free regions owned by somebody
+ * else. We must only reserve (and then free) regions:
+ *
+ * - Not within any part of the kernel
+ * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc)
+ */
+static bool can_free_region(u64 start, u64 size)
+{
+	if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
+		return false;
+
+	if (!e820_all_mapped(start, start+size, E820_RAM))
+		return false;
+
+	return true;
+}
+
+/*
  * The UEFI specification makes it clear that the operating system is free to do
  * whatever it wants with boot services code after ExitBootServices() has been
  * called. Ignoring this recommendation a significant bunch of EFI implementations 
@@ -146,26 +168,50 @@
 		efi_memory_desc_t *md = p;
 		u64 start = md->phys_addr;
 		u64 size = md->num_pages << EFI_PAGE_SHIFT;
+		bool already_reserved;
 
 		if (md->type != EFI_BOOT_SERVICES_CODE &&
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
-		/* Only reserve where possible:
-		 * - Not within any already allocated areas
-		 * - Not over any memory area (really needed, if above?)
-		 * - Not within any part of the kernel
-		 * - Not the bios reserved area
-		*/
-		if ((start + size > __pa_symbol(_text)
-				&& start <= __pa_symbol(_end)) ||
-			!e820_all_mapped(start, start+size, E820_RAM) ||
-			memblock_is_region_reserved(start, size)) {
-			/* Could not reserve, skip it */
-			md->num_pages = 0;
-			memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
-				     start, start+size-1);
-		} else
+
+		already_reserved = memblock_is_region_reserved(start, size);
+
+		/*
+		 * Because the following memblock_reserve() is paired
+		 * with free_bootmem_late() for this region in
+		 * efi_free_boot_services(), we must be extremely
+		 * careful not to reserve, and subsequently free,
+		 * critical regions of memory (like the kernel image) or
+		 * those regions that somebody else has already
+		 * reserved.
+		 *
+		 * A good example of a critical region that must not be
+		 * freed is page zero (first 4Kb of memory), which may
+		 * contain boot services code/data but is marked
+		 * E820_RESERVED by trim_bios_range().
+		 */
+		if (!already_reserved) {
 			memblock_reserve(start, size);
+
+			/*
+			 * If we are the first to reserve the region, no
+			 * one else cares about it. We own it and can
+			 * free it later.
+			 */
+			if (can_free_region(start, size))
+				continue;
+		}
+
+		/*
+		 * We don't own the region. We must not free it.
+		 *
+		 * Setting this bit for a boot services region really
+		 * doesn't make sense as far as the firmware is
+		 * concerned, but it does provide us with a way to tag
+		 * those regions that must not be paired with
+		 * free_bootmem_late().
+		 */
+		md->attribute |= EFI_MEMORY_RUNTIME;
 	}
 }
 
@@ -182,8 +228,8 @@
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
-		/* Could not reserve boot area */
-		if (!size)
+		/* Do not free, someone else owns it: */
+		if (md->attribute & EFI_MEMORY_RUNTIME)
 			continue;
 
 		free_bootmem_late(start, size);
@@ -248,6 +294,16 @@
 	return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+	{ NULL, "SGI UV1",
+		{	DMI_MATCH(DMI_PRODUCT_NAME,	"Stoutland Platform"),
+			DMI_MATCH(DMI_PRODUCT_VERSION,	"1.0"),
+			DMI_MATCH(DMI_BIOS_VENDOR,	"SGI.COM"),
+		}
+	},
+	{ } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
 	/*
@@ -260,10 +316,8 @@
 		efi_unmap_memmap();
 	}
 
-	/*
-	 * UV doesn't support the new EFI pagetable mapping yet.
-	 */
-	if (is_uv_system())
+	/* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+	if (dmi_check_system(sgi_uv1_dmi))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 76b6632..1865c19 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/string.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
@@ -35,6 +35,11 @@
 #define BIOS_SIGNATURE_COREBOOT		0x500
 #define BIOS_REGION_SIZE		0x10000
 
+/*
+ * This driver is not modular, but to keep back compatibility
+ * with existing use cases, continuing with module_param is
+ * the easiest way forward.
+ */
 static bool force = 0;
 module_param(force, bool, 0444);
 /* FIXME: Award bios is not automatically detected as Alix platform */
@@ -192,9 +197,4 @@
 
 	return 0;
 }
-
-module_init(alix_init);
-
-MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
-MODULE_DESCRIPTION("PCEngines ALIX System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(alix_init);
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index aa733fb..4fcdb91 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -19,7 +19,6 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/string.h>
-#include <linux/module.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
@@ -120,9 +119,4 @@
 
 	return 0;
 }
-
-module_init(geos_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(geos_init);
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 927e38c..a2f6b98 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -20,7 +20,6 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/string.h>
-#include <linux/module.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
@@ -146,9 +145,4 @@
 
 	return 0;
 }
-
-module_init(net5501_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Soekris net5501 System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(net5501_init);
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 1bbc21e..90bb997 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -138,7 +138,7 @@
 		intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
 	else {
 		intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-		pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+		pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
 	}
 
 out:
@@ -214,12 +214,10 @@
 	else if (strcmp("lapic_and_apbt", arg) == 0)
 		intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
 	else {
-		pr_warn("X86 INTEL_MID timer option %s not recognised"
-			   " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-			   arg);
+		pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+			arg);
 		return -EINVAL;
 	}
 	return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 23381d2..1eb47b6 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -52,10 +52,7 @@
 	/* mark tsc clocksource as reliable */
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
 
-	if (fast_calibrate)
-		return fast_calibrate;
-
-	return 0;
+	return fast_calibrate;
 }
 
 static void __init penwell_arch_setup(void)
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
index aaca917..bd1adc6 100644
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ b/arch/x86/platform/intel-mid/mrfl.c
@@ -81,10 +81,7 @@
 	/* mark tsc clocksource as reliable */
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
 
-	if (fast_calibrate)
-		return fast_calibrate;
-
-	return 0;
+	return fast_calibrate;
 }
 
 static void __init tangier_arch_setup(void)
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index c1bdafa..17d6d22 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -1,5 +1,5 @@
 /**
- * imr.c
+ * imr.c -- Intel Isolated Memory Region driver
  *
  * Copyright(c) 2013 Intel Corporation.
  * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -31,7 +31,6 @@
 #include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/types.h>
 
 struct imr_device {
@@ -135,11 +134,9 @@
  * @idev:	pointer to imr_device structure.
  * @imr_id:	IMR entry to write.
  * @imr:	IMR structure representing address and access masks.
- * @lock:	indicates if the IMR lock bit should be applied.
  * @return:	0 on success or error code passed from mbi_iosf on failure.
  */
-static int imr_write(struct imr_device *idev, u32 imr_id,
-		     struct imr_regs *imr, bool lock)
+static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
 {
 	unsigned long flags;
 	u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
@@ -163,15 +160,6 @@
 	if (ret)
 		goto failed;
 
-	/* Lock bit must be set separately to addr_lo address bits. */
-	if (lock) {
-		imr->addr_lo |= IMR_LOCK;
-		ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE,
-				     reg - IMR_NUM_REGS, imr->addr_lo);
-		if (ret)
-			goto failed;
-	}
-
 	local_irq_restore(flags);
 	return 0;
 failed:
@@ -220,11 +208,12 @@
 		if (imr_is_enabled(&imr)) {
 			base = imr_to_phys(imr.addr_lo);
 			end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+			size = end - base + 1;
 		} else {
 			base = 0;
 			end = 0;
+			size = 0;
 		}
-		size = end - base;
 		seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
 			   "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
 			   &base, &end, size, imr.rmask, imr.wmask,
@@ -269,17 +258,6 @@
 }
 
 /**
- * imr_debugfs_unregister - unregister debugfs hooks.
- *
- * @idev:	pointer to imr_device structure.
- * @return:
- */
-static void imr_debugfs_unregister(struct imr_device *idev)
-{
-	debugfs_remove(idev->file);
-}
-
-/**
  * imr_check_params - check passed address range IMR alignment and non-zero size
  *
  * @base:	base address of intended IMR.
@@ -333,11 +311,10 @@
  * @size:	physical size of region in bytes must be aligned to 1KiB.
  * @read_mask:	read access mask.
  * @write_mask:	write access mask.
- * @lock:	indicates whether or not to permanently lock this region.
  * @return:	zero on success or negative value indicating error.
  */
 int imr_add_range(phys_addr_t base, size_t size,
-		  unsigned int rmask, unsigned int wmask, bool lock)
+		  unsigned int rmask, unsigned int wmask)
 {
 	phys_addr_t end;
 	unsigned int i;
@@ -410,7 +387,7 @@
 	imr.rmask = rmask;
 	imr.wmask = wmask;
 
-	ret = imr_write(idev, reg, &imr, lock);
+	ret = imr_write(idev, reg, &imr);
 	if (ret < 0) {
 		/*
 		 * In the highly unlikely event iosf_mbi_write failed
@@ -421,7 +398,7 @@
 		imr.addr_hi = 0;
 		imr.rmask = IMR_READ_ACCESS_ALL;
 		imr.wmask = IMR_WRITE_ACCESS_ALL;
-		imr_write(idev, reg, &imr, false);
+		imr_write(idev, reg, &imr);
 	}
 failed:
 	mutex_unlock(&idev->lock);
@@ -517,7 +494,7 @@
 	imr.rmask = IMR_READ_ACCESS_ALL;
 	imr.wmask = IMR_WRITE_ACCESS_ALL;
 
-	ret = imr_write(idev, reg, &imr, false);
+	ret = imr_write(idev, reg, &imr);
 
 failed:
 	mutex_unlock(&idev->lock);
@@ -579,6 +556,7 @@
 {
 	phys_addr_t base = virt_to_phys(&_text);
 	size_t size = virt_to_phys(&__end_rodata) - base;
+	unsigned long start, end;
 	int i;
 	int ret;
 
@@ -586,18 +564,24 @@
 	for (i = 0; i < idev->max_imr; i++)
 		imr_clear(i);
 
+	start = (unsigned long)_text;
+	end = (unsigned long)__end_rodata - 1;
+
 	/*
-	 * Setup a locked IMR around the physical extent of the kernel
+	 * Setup an unlocked IMR around the physical extent of the kernel
 	 * from the beginning of the .text secton to the end of the
 	 * .rodata section as one physically contiguous block.
+	 *
+	 * We don't round up @size since it is already PAGE_SIZE aligned.
+	 * See vmlinux.lds.S for details.
 	 */
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	if (ret < 0) {
-		pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-			&_text, &__end_rodata);
+		pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+			size / 1024, start, end);
 	} else {
-		pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-			size / 1024, &_text, &__end_rodata);
+		pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+			size / 1024, start, end);
 	}
 
 }
@@ -606,7 +590,6 @@
 	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
 
 /**
  * imr_init - entry point for IMR driver.
@@ -632,22 +615,4 @@
 	imr_fixup_memmap(idev);
 	return 0;
 }
-
-/**
- * imr_exit - exit point for IMR code.
- *
- * Deregisters debugfs, leave IMR state as-is.
- *
- * return:
- */
-static void __exit imr_exit(void)
-{
-	imr_debugfs_unregister(&imr_dev);
-}
-
-module_init(imr_init);
-module_exit(imr_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_init);
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 278e4da..f5bad40 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -1,5 +1,5 @@
 /**
- * imr_selftest.c
+ * imr_selftest.c -- Intel Isolated Memory Region self-test driver
  *
  * Copyright(c) 2013 Intel Corporation.
  * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -15,7 +15,6 @@
 #include <asm/imr.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/types.h>
 
 #define SELFTEST KBUILD_MODNAME ": "
@@ -61,30 +60,30 @@
 	int ret;
 
 	/* Test zero zero. */
-	ret = imr_add_range(0, 0, 0, 0, false);
+	ret = imr_add_range(0, 0, 0, 0);
 	imr_self_test_result(ret < 0, "zero sized IMR\n");
 
 	/* Test exact overlap. */
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
 
 	/* Test overlap with base inside of existing. */
 	base += size - IMR_ALIGN;
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
 
 	/* Test overlap with end inside of existing. */
 	base -= size + IMR_ALIGN * 2;
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
 
 	/* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
 	ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
-			    IMR_WRITE_ACCESS_ALL, false);
+			    IMR_WRITE_ACCESS_ALL);
 	imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
 
 	/* Test that a 1 KiB IMR @ zero with CPU only will work. */
-	ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
 	if (ret >= 0) {
 		ret = imr_remove_range(0, IMR_ALIGN);
@@ -93,8 +92,7 @@
 
 	/* Test 2 KiB works. */
 	size = IMR_ALIGN * 2;
-	ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
-			    IMR_WRITE_ACCESS_ALL, false);
+	ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL);
 	imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
 	if (ret >= 0) {
 		ret = imr_remove_range(0, size);
@@ -106,7 +104,6 @@
 	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
 
 /**
  * imr_self_test_init - entry point for IMR driver.
@@ -125,13 +122,4 @@
  *
  * return:
  */
-static void __exit imr_self_test_exit(void)
-{
-}
-
-module_init(imr_self_test_init);
-module_exit(imr_self_test_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_self_test_init);
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 174781a..00c3190 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -3,7 +3,7 @@
 
 #include <asm/asm.h>
 #include <asm/segment.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cmpxchg.h>
 #include <asm/nops.h>
 
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
index 8502ad3..5adb6a2 100644
--- a/arch/x86/um/os-Linux/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -109,7 +109,7 @@
 		exit(1);
 	}
 
-	printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT);
+	printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT);
 	printf("Locating the top of the address space ... ");
 	fflush(stdout);
 
@@ -134,7 +134,7 @@
 		exit(1);
 	}
 	top <<= UM_KERN_PAGE_SHIFT;
-	printf("0x%x\n", top);
+	printf("0x%lx\n", top);
 
 	return top;
 }
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 439c099..bfce503 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,11 +25,11 @@
 
 #define old_mmap sys_old_mmap
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_32.h>
 
 #undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b74ea6c..f306413 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -35,14 +35,11 @@
 #define stub_execveat sys_execveat
 #define stub_rt_sigreturn sys_rt_sigreturn
 
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_64.h>
 
 #undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ce7e360..470564b 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -9,14 +9,12 @@
 #include <asm/types.h>
 
 #ifdef __i386__
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_32.h>
 };
 #else
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_64.h>
 };
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d09e4c9..2c26108 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1654,7 +1654,7 @@
 	cpu_detect(&new_cpu_data);
 	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
 	new_cpu_data.wp_works_ok = 1;
-	new_cpu_data.x86_capability[0] = cpuid_edx(1);
+	new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 #endif
 
 	if (xen_start_info->mod_start) {
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 724a087..9466354 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -11,7 +11,7 @@
 #include "pmu.h"
 
 /* x86_pmu.handle_irq definition */
-#include "../kernel/cpu/perf_event.h"
+#include "../events/perf_event.h"
 
 #define XENPMU_IRQ_PROCESSING    1
 struct xenpmu {
diff --git a/block/Kconfig b/block/Kconfig
index 161491d..0363cd7 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -88,6 +88,19 @@
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
+config BLK_DEV_DAX
+	bool "Block device DAX support"
+	depends on FS_DAX
+	depends on BROKEN
+	help
+	  When DAX support is available (CONFIG_FS_DAX) raw block
+	  devices can also support direct userspace access to the
+	  storage capacity via MMAP(2) similar to a file on a
+	  DAX-enabled filesystem.  However, the DAX I/O-path disables
+	  some standard I/O-statistics, and the MMAP(2) path has some
+	  operational differences due to bypassing the page
+	  cache.  If in doubt, say N.
+
 config BLK_DEV_THROTTLING
 	bool "Block layer bio throttling support"
 	depends on BLK_CGROUP=y
diff --git a/block/bio.c b/block/bio.c
index dbabd48..cf75915 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -874,7 +874,7 @@
 	bio->bi_private = &ret;
 	bio->bi_end_io = submit_bio_wait_endio;
 	submit_bio(rw, bio);
-	wait_for_completion(&ret.event);
+	wait_for_completion_io(&ret.event);
 
 	return ret.error;
 }
@@ -1090,9 +1090,12 @@
 	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
 		/*
 		 * if we're in a workqueue, the request is orphaned, so
-		 * don't copy into a random user address space, just free.
+		 * don't copy into a random user address space, just free
+		 * and return -EINTR so user space doesn't expect any data.
 		 */
-		if (current->mm && bio_data_dir(bio) == READ)
+		if (!current->mm)
+			ret = -EINTR;
+		else if (bio_data_dir(bio) == READ)
 			ret = bio_copy_to_iter(bio, bmd->iter);
 		if (bmd->is_our_pages)
 			bio_free_pages(bio);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5a37188..66e6f1a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -788,6 +788,7 @@
 {
 	struct gendisk *disk;
 	struct blkcg_gq *blkg;
+	struct module *owner;
 	unsigned int major, minor;
 	int key_len, part, ret;
 	char *body;
@@ -804,7 +805,9 @@
 	if (!disk)
 		return -ENODEV;
 	if (part) {
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		return -ENODEV;
 	}
 
@@ -820,7 +823,9 @@
 		ret = PTR_ERR(blkg);
 		rcu_read_unlock();
 		spin_unlock_irq(disk->queue->queue_lock);
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		/*
 		 * If queue was bypassing, we should retry.  Do so after a
 		 * short msleep().  It isn't strictly necessary but queue
@@ -851,9 +856,13 @@
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
+	struct module *owner;
+
 	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
+	owner = ctx->disk->fops->owner;
 	put_disk(ctx->disk);
+	module_put(owner);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index ab51685..b83d297 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2455,14 +2455,16 @@
 
 			rq = NULL;
 			break;
-		} else if (ret == BLKPREP_KILL) {
+		} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
+			int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
+
 			rq->cmd_flags |= REQ_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
-			__blk_end_request_all(rq, -EIO);
+			__blk_end_request_all(rq, err);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
diff --git a/block/blk-map.c b/block/blk-map.c
index f565e11..a54f054 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -57,6 +57,49 @@
 	return ret;
 }
 
+static int __blk_rq_map_user_iov(struct request *rq,
+		struct rq_map_data *map_data, struct iov_iter *iter,
+		gfp_t gfp_mask, bool copy)
+{
+	struct request_queue *q = rq->q;
+	struct bio *bio, *orig_bio;
+	int ret;
+
+	if (copy)
+		bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
+	else
+		bio = bio_map_user_iov(q, iter, gfp_mask);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	if (map_data && map_data->null_mapped)
+		bio_set_flag(bio, BIO_NULL_MAPPED);
+
+	iov_iter_advance(iter, bio->bi_iter.bi_size);
+	if (map_data)
+		map_data->offset += bio->bi_iter.bi_size;
+
+	orig_bio = bio;
+	blk_queue_bounce(q, &bio);
+
+	/*
+	 * We link the bounce buffer in and could have to traverse it
+	 * later so we have to get a ref to prevent it from being freed
+	 */
+	bio_get(bio);
+
+	ret = blk_rq_append_bio(q, rq, bio);
+	if (ret) {
+		bio_endio(bio);
+		__blk_rq_unmap_user(orig_bio);
+		bio_put(bio);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
@@ -82,10 +125,11 @@
 			struct rq_map_data *map_data,
 			const struct iov_iter *iter, gfp_t gfp_mask)
 {
-	struct bio *bio;
-	int unaligned = 0;
-	struct iov_iter i;
 	struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
+	bool copy = (q->dma_pad_mask & iter->count) || map_data;
+	struct bio *bio = NULL;
+	struct iov_iter i;
+	int ret;
 
 	if (!iter || !iter->count)
 		return -EINVAL;
@@ -101,42 +145,29 @@
 		 */
 		if ((uaddr & queue_dma_alignment(q)) ||
 		    iovec_gap_to_prv(q, &prv, &iov))
-			unaligned = 1;
+			copy = true;
 
 		prv.iov_base = iov.iov_base;
 		prv.iov_len = iov.iov_len;
 	}
 
-	if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
-		bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
-	else
-		bio = bio_map_user_iov(q, iter, gfp_mask);
-
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	if (map_data && map_data->null_mapped)
-		bio_set_flag(bio, BIO_NULL_MAPPED);
-
-	if (bio->bi_iter.bi_size != iter->count) {
-		/*
-		 * Grab an extra reference to this bio, as bio_unmap_user()
-		 * expects to be able to drop it twice as it happens on the
-		 * normal IO completion path
-		 */
-		bio_get(bio);
-		bio_endio(bio);
-		__blk_rq_unmap_user(bio);
-		return -EINVAL;
-	}
+	i = *iter;
+	do {
+		ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
+		if (ret)
+			goto unmap_rq;
+		if (!bio)
+			bio = rq->bio;
+	} while (iov_iter_count(&i));
 
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
 		rq->cmd_flags |= REQ_COPY_USER;
-
-	blk_queue_bounce(q, &bio);
-	bio_get(bio);
-	blk_rq_bio_prep(q, rq, bio);
 	return 0;
+
+unmap_rq:
+	__blk_rq_unmap_user(bio);
+	rq->bio = NULL;
+	return -EINVAL;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1699df5..2613531 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -70,6 +70,18 @@
 	return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+				       struct bio *bio)
+{
+	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+	unsigned mask = queue_logical_block_size(q) - 1;
+
+	/* aligned to logical block size */
+	sectors &= ~(mask >> 9);
+
+	return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
 					 struct bio *bio,
 					 struct bio_set *bs,
@@ -81,6 +93,7 @@
 	unsigned front_seg_size = bio->bi_seg_front_size;
 	bool do_split = true;
 	struct bio *new = NULL;
+	const unsigned max_sectors = get_max_io_size(q, bio);
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
@@ -90,20 +103,19 @@
 		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
 			goto split;
 
-		if (sectors + (bv.bv_len >> 9) >
-				blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+		if (sectors + (bv.bv_len >> 9) > max_sectors) {
 			/*
 			 * Consider this a new segment if we're splitting in
 			 * the middle of this vector.
 			 */
 			if (nsegs < queue_max_segments(q) &&
-			    sectors < blk_max_size_offset(q,
-						bio->bi_iter.bi_sector)) {
+			    sectors < max_sectors) {
 				nsegs++;
-				sectors = blk_max_size_offset(q,
-						bio->bi_iter.bi_sector);
+				sectors = max_sectors;
 			}
-			goto split;
+			if (sectors)
+				goto split;
+			/* Make this single bvec as the 1st segment */
 		}
 
 		if (bvprvp && blk_queue_cluster(q)) {
@@ -292,7 +304,6 @@
 				   struct bio *nxt)
 {
 	struct bio_vec end_bv = { NULL }, nxt_bv;
-	struct bvec_iter iter;
 
 	if (!blk_queue_cluster(q))
 		return 0;
@@ -304,11 +315,8 @@
 	if (!bio_has_data(bio))
 		return 1;
 
-	bio_for_each_segment(end_bv, bio, iter)
-		if (end_bv.bv_len == iter.bi_size)
-			break;
-
-	nxt_bv = bio_iovec(nxt);
+	bio_get_last_bvec(bio, &end_bv);
+	bio_get_first_bvec(nxt, &nxt_bv);
 
 	if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
 		return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4c0622f..56c0a72 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -599,8 +599,10 @@
 		 * If a request wasn't started before the queue was
 		 * marked dying, kill it here or it'll go unnoticed.
 		 */
-		if (unlikely(blk_queue_dying(rq->q)))
-			blk_mq_complete_request(rq, -EIO);
+		if (unlikely(blk_queue_dying(rq->q))) {
+			rq->errors = -EIO;
+			blk_mq_end_request(rq, rq->errors);
+		}
 		return;
 	}
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index dd49735..c7bb666 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,8 +91,8 @@
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->virt_boundary_mask = 0;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-	lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
-		BLK_SAFE_MAX_SECTORS;
+	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->max_dev_sectors = 0;
 	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e140cc4..dd937630 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -147,10 +147,9 @@
 
 static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
 {
-	unsigned long long val;
 
-	val = q->limits.max_hw_discard_sectors << 9;
-	return sprintf(page, "%llu\n", val);
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_hw_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index a753df2..d0dd788 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -39,7 +39,6 @@
 	 */
 	struct request *next_rq[2];
 	unsigned int batching;		/* number of sequential requests made */
-	sector_t last_sector;		/* head position */
 	unsigned int starved;		/* times reads have starved writes */
 
 	/*
@@ -210,8 +209,6 @@
 	dd->next_rq[WRITE] = NULL;
 	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	dd->last_sector = rq_end_sector(rq);
-
 	/*
 	 * take it off the sort and fifo list, move
 	 * to dispatch queue
diff --git a/block/ioctl.c b/block/ioctl.c
index 77f5d177..d8996bb 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -434,42 +434,6 @@
 
 	return true;
 }
-
-static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
-{
-	unsigned long arg;
-	int rc = 0;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (get_user(arg, (int __user *)(argp)))
-		return -EFAULT;
-	arg = !!arg;
-	if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
-		return 0;
-
-	if (arg)
-		arg = S_DAX;
-
-	if (arg && !blkdev_dax_capable(bdev))
-		return -ENOTTY;
-
-	inode_lock(bdev->bd_inode);
-	if (bdev->bd_map_count == 0)
-		inode_set_flags(bdev->bd_inode, arg, S_DAX);
-	else
-		rc = -EBUSY;
-	inode_unlock(bdev->bd_inode);
-	return rc;
-}
-#else
-static int blkdev_daxset(struct block_device *bdev, int arg)
-{
-	if (arg)
-		return -ENOTTY;
-	return 0;
-}
 #endif
 
 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
@@ -634,8 +598,6 @@
 	case BLKTRACESETUP:
 	case BLKTRACETEARDOWN:
 		return blk_trace_ioctl(bdev, cmd, argp);
-	case BLKDAXSET:
-		return blkdev_daxset(bdev, arg);
 	case BLKDAXGET:
 		return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
 		break;
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 746935a..fefd01b 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -16,6 +16,7 @@
 #include <linux/kmod.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
+#include <linux/dax.h>
 #include <linux/blktrace_api.h>
 
 #include "partitions/check.h"
@@ -550,13 +551,24 @@
 	return 0;
 }
 
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
 {
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
+			NULL);
+}
+
+unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+{
 	struct page *page;
 
-	page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-				 NULL);
+	/* don't populate page cache for dax capable devices */
+	if (IS_DAX(bdev->bd_inode))
+		page = read_dax_sector(bdev, n);
+	else
+		page = read_pagecache_sector(bdev, n);
+
 	if (!IS_ERR(page)) {
 		if (PageError(page))
 			goto fail;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7240821..3be07ad 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -472,11 +472,13 @@
 config CRYPTO_GHASH
 	tristate "GHASH digest algorithm"
 	select CRYPTO_GF128MUL
+	select CRYPTO_HASH
 	help
 	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
 
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
+	select CRYPTO_HASH
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 608a756..68a5cea 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -54,7 +54,8 @@
 
 	lock_sock(sk);
 	if (!ctx->more) {
-		err = crypto_ahash_init(&ctx->req);
+		err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
+						&ctx->completion);
 		if (err)
 			goto unlock;
 	}
@@ -125,6 +126,7 @@
 	} else {
 		if (!ctx->more) {
 			err = crypto_ahash_init(&ctx->req);
+			err = af_alg_wait_for_completion(err, &ctx->completion);
 			if (err)
 				goto unlock;
 		}
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 38c1aa8..28556fc 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -65,18 +65,10 @@
 	struct skcipher_async_rsgl first_sgl;
 	struct list_head list;
 	struct scatterlist *tsg;
-	char iv[];
+	atomic_t *inflight;
+	struct skcipher_request req;
 };
 
-#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
-	crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)))
-
-#define GET_REQ_SIZE(ctx) \
-	crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))
-
-#define GET_IV_SIZE(ctx) \
-	crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req))
-
 #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
 		      sizeof(struct scatterlist) - 1)
 
@@ -102,15 +94,12 @@
 
 static void skcipher_async_cb(struct crypto_async_request *req, int err)
 {
-	struct sock *sk = req->data;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_async_req *sreq = GET_SREQ(req, ctx);
+	struct skcipher_async_req *sreq = req->data;
 	struct kiocb *iocb = sreq->iocb;
 
-	atomic_dec(&ctx->inflight);
+	atomic_dec(sreq->inflight);
 	skcipher_free_async_sgls(sreq);
-	kfree(req);
+	kzfree(sreq);
 	iocb->ki_complete(iocb, err, err);
 }
 
@@ -306,8 +295,11 @@
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
 	struct skcipher_ctx *ctx = ask->private;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
+	struct skcipher_tfm *skc = pask->private;
+	struct crypto_skcipher *tfm = skc->skcipher;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
 	struct skcipher_sg_list *sgl;
 	struct af_alg_control con = {};
@@ -509,37 +501,43 @@
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
 	struct skcipher_ctx *ctx = ask->private;
+	struct skcipher_tfm *skc = pask->private;
+	struct crypto_skcipher *tfm = skc->skcipher;
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
 	struct skcipher_async_req *sreq;
 	struct skcipher_request *req;
 	struct skcipher_async_rsgl *last_rsgl = NULL;
-	unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx);
-	unsigned int reqlen = sizeof(struct skcipher_async_req) +
-				GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx);
+	unsigned int txbufs = 0, len = 0, tx_nents;
+	unsigned int reqsize = crypto_skcipher_reqsize(tfm);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
 	int err = -ENOMEM;
 	bool mark = false;
+	char *iv;
+
+	sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
+	if (unlikely(!sreq))
+		goto out;
+
+	req = &sreq->req;
+	iv = (char *)(req + 1) + reqsize;
+	sreq->iocb = msg->msg_iocb;
+	INIT_LIST_HEAD(&sreq->list);
+	sreq->inflight = &ctx->inflight;
 
 	lock_sock(sk);
-	req = kmalloc(reqlen, GFP_KERNEL);
-	if (unlikely(!req))
-		goto unlock;
-
-	sreq = GET_SREQ(req, ctx);
-	sreq->iocb = msg->msg_iocb;
-	memset(&sreq->first_sgl, '\0', sizeof(struct skcipher_async_rsgl));
-	INIT_LIST_HEAD(&sreq->list);
+	tx_nents = skcipher_all_sg_nents(ctx);
 	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-	if (unlikely(!sreq->tsg)) {
-		kfree(req);
+	if (unlikely(!sreq->tsg))
 		goto unlock;
-	}
 	sg_init_table(sreq->tsg, tx_nents);
-	memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
-	skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req));
-	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				      skcipher_async_cb, sk);
+	memcpy(iv, ctx->iv, ivsize);
+	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+				      skcipher_async_cb, sreq);
 
 	while (iov_iter_count(&msg->msg_iter)) {
 		struct skcipher_async_rsgl *rsgl;
@@ -615,20 +613,22 @@
 		sg_mark_end(sreq->tsg + txbufs - 1);
 
 	skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-				   len, sreq->iv);
+				   len, iv);
 	err = ctx->enc ? crypto_skcipher_encrypt(req) :
 			 crypto_skcipher_decrypt(req);
 	if (err == -EINPROGRESS) {
 		atomic_inc(&ctx->inflight);
 		err = -EIOCBQUEUED;
+		sreq = NULL;
 		goto unlock;
 	}
 free:
 	skcipher_free_async_sgls(sreq);
-	kfree(req);
 unlock:
 	skcipher_wmem_wakeup(sk);
 	release_sock(sk);
+	kzfree(sreq);
+out:
 	return err;
 }
 
@@ -637,9 +637,12 @@
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
 	struct skcipher_ctx *ctx = ask->private;
-	unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm(
-		&ctx->req));
+	struct skcipher_tfm *skc = pask->private;
+	struct crypto_skcipher *tfm = skc->skcipher;
+	unsigned bs = crypto_skcipher_blocksize(tfm);
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
 	int err = -EAGAIN;
@@ -947,7 +950,8 @@
 	ask->private = ctx;
 
 	skcipher_request_set_tfm(&ctx->req, skcipher);
-	skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+	skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+						 CRYPTO_TFM_REQ_MAY_BACKLOG,
 				      af_alg_complete, &ctx->completion);
 
 	sk->sk_destruct = skcipher_sock_destruct;
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 758acab..8f3056c 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -547,9 +547,7 @@
 	struct pkcs7_signed_info *sinfo = ctx->sinfo;
 
 	if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) ||
-	    !test_bit(sinfo_has_message_digest, &sinfo->aa_set) ||
-	    (ctx->msg->data_type == OID_msIndirectData &&
-	     !test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set))) {
+	    !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) {
 		pr_warn("Missing required AuthAttr\n");
 		return -EBADMSG;
 	}
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 237f379..43fe85f 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -499,6 +499,7 @@
 		if (link->dump == NULL)
 			return -EINVAL;
 
+		down_read(&crypto_alg_sem);
 		list_for_each_entry(alg, &crypto_alg_list, cra_list)
 			dump_alloc += CRYPTO_REPORT_MAXSIZE;
 
@@ -508,8 +509,11 @@
 				.done = link->done,
 				.min_dump_alloc = dump_alloc,
 			};
-			return netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+			err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
 		}
+		up_read(&crypto_alg_sem);
+
+		return err;
 	}
 
 	err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX,
diff --git a/crypto/shash.c b/crypto/shash.c
index 88a27de..3597545 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -354,11 +354,10 @@
 	crt->final = shash_async_final;
 	crt->finup = shash_async_finup;
 	crt->digest = shash_async_digest;
+	crt->setkey = shash_async_setkey;
 
-	if (alg->setkey) {
-		crt->setkey = shash_async_setkey;
-		crt->has_setkey = true;
-	}
+	crt->has_setkey = alg->setkey != shash_no_setkey;
+
 	if (alg->export)
 		crt->export = shash_async_export;
 	if (alg->import)
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index c570b1d..0872d5f 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -880,7 +880,7 @@
 		break;
 	case BUS_NOTIFY_DRIVER_NOT_BOUND:
 	case BUS_NOTIFY_UNBOUND_DRIVER:
-		pdev->dev.pm_domain = NULL;
+		dev_pm_domain_set(&pdev->dev, NULL);
 		break;
 	case BUS_NOTIFY_ADD_DEVICE:
 		dev_pm_domain_set(&pdev->dev, &acpi_lpss_pm_domain);
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 296b7a1..b6f7fa3 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -62,7 +62,7 @@
 	if (count < 0) {
 		return NULL;
 	} else if (count > 0) {
-		resources = kmalloc(count * sizeof(struct resource),
+		resources = kzalloc(count * sizeof(struct resource),
 				    GFP_KERNEL);
 		if (!resources) {
 			dev_err(&adev->dev, "No memory for resources\n");
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 3052185..d48cbed 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -269,8 +269,7 @@
 	 */
 	if (ACPI_SUCCESS(status) &&
 	    possible_method_call && (node->type == ACPI_TYPE_METHOD)) {
-		if (GET_CURRENT_ARG_TYPE(walk_state->arg_types) ==
-		    ARGP_SUPERNAME) {
+		if (walk_state->opcode == AML_UNLOAD_OP) {
 			/*
 			 * acpi_ps_get_next_namestring has increased the AML pointer,
 			 * so we need to restore the saved AML pointer for method call.
@@ -697,7 +696,7 @@
  *
  * PARAMETERS:  walk_state          - Current state
  *              parser_state        - Current parser state object
- *              arg_type            - The parser argument type (ARGP_*)
+ *              arg_type            - The argument type (AML_*_ARG)
  *              return_arg          - Where the next arg is returned
  *
  * RETURN:      Status, and an op object containing the next argument.
@@ -817,9 +816,9 @@
 				return_ACPI_STATUS(AE_NO_MEMORY);
 			}
 
-			/* super_name allows argument to be a method call */
+			/* To support super_name arg of Unload */
 
-			if (arg_type == ARGP_SUPERNAME) {
+			if (walk_state->opcode == AML_UNLOAD_OP) {
 				status =
 				    acpi_ps_get_next_namepath(walk_state,
 							      parser_state, arg,
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 0431883..559c117 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -519,7 +519,7 @@
 			     u64 param3, u64 param4)
 {
 	int rc;
-	unsigned long pfn;
+	u64 base_addr, size;
 
 	/* If user manually set "flags", make sure it is legal */
 	if (flags && (flags &
@@ -545,10 +545,17 @@
 	/*
 	 * Disallow crazy address masks that give BIOS leeway to pick
 	 * injection address almost anywhere. Insist on page or
-	 * better granularity and that target address is normal RAM.
+	 * better granularity and that target address is normal RAM or
+	 * NVDIMM.
 	 */
-	pfn = PFN_DOWN(param1 & param2);
-	if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK))
+	base_addr = param1 & param2;
+	size = ~param2 + 1;
+
+	if (((param2 & PAGE_MASK) != PAGE_MASK) ||
+	    ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+				!= REGION_INTERSECTS) &&
+	     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
+				!= REGION_INTERSECTS)))
 		return -EINVAL;
 
 inject:
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index ad6d8c6..35947ac 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -469,37 +469,16 @@
 	nfit_mem->bdw = NULL;
 }
 
-static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
 {
 	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
 	struct nfit_memdev *nfit_memdev;
 	struct nfit_flush *nfit_flush;
-	struct nfit_dcr *nfit_dcr;
 	struct nfit_bdw *nfit_bdw;
 	struct nfit_idt *nfit_idt;
 	u16 idt_idx, range_index;
 
-	list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-		if (nfit_dcr->dcr->region_index != dcr)
-			continue;
-		nfit_mem->dcr = nfit_dcr->dcr;
-		break;
-	}
-
-	if (!nfit_mem->dcr) {
-		dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
-				spa->range_index, __to_nfit_memdev(nfit_mem)
-				? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
-		return -ENODEV;
-	}
-
-	/*
-	 * We've found enough to create an nvdimm, optionally
-	 * find an associated BDW
-	 */
-	list_add(&nfit_mem->list, &acpi_desc->dimms);
-
 	list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
 		if (nfit_bdw->bdw->region_index != dcr)
 			continue;
@@ -508,12 +487,12 @@
 	}
 
 	if (!nfit_mem->bdw)
-		return 0;
+		return;
 
 	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
 
 	if (!nfit_mem->spa_bdw)
-		return 0;
+		return;
 
 	range_index = nfit_mem->spa_bdw->range_index;
 	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
@@ -538,8 +517,6 @@
 		}
 		break;
 	}
-
-	return 0;
 }
 
 static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
@@ -548,7 +525,6 @@
 	struct nfit_mem *nfit_mem, *found;
 	struct nfit_memdev *nfit_memdev;
 	int type = nfit_spa_type(spa);
-	u16 dcr;
 
 	switch (type) {
 	case NFIT_SPA_DCR:
@@ -559,14 +535,18 @@
 	}
 
 	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-		int rc;
+		struct nfit_dcr *nfit_dcr;
+		u32 device_handle;
+		u16 dcr;
 
 		if (nfit_memdev->memdev->range_index != spa->range_index)
 			continue;
 		found = NULL;
 		dcr = nfit_memdev->memdev->region_index;
+		device_handle = nfit_memdev->memdev->device_handle;
 		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-			if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
+			if (__to_nfit_memdev(nfit_mem)->device_handle
+					== device_handle) {
 				found = nfit_mem;
 				break;
 			}
@@ -579,6 +559,31 @@
 			if (!nfit_mem)
 				return -ENOMEM;
 			INIT_LIST_HEAD(&nfit_mem->list);
+			list_add(&nfit_mem->list, &acpi_desc->dimms);
+		}
+
+		list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+			if (nfit_dcr->dcr->region_index != dcr)
+				continue;
+			/*
+			 * Record the control region for the dimm.  For
+			 * the ACPI 6.1 case, where there are separate
+			 * control regions for the pmem vs blk
+			 * interfaces, be sure to record the extended
+			 * blk details.
+			 */
+			if (!nfit_mem->dcr)
+				nfit_mem->dcr = nfit_dcr->dcr;
+			else if (nfit_mem->dcr->windows == 0
+					&& nfit_dcr->dcr->windows)
+				nfit_mem->dcr = nfit_dcr->dcr;
+			break;
+		}
+
+		if (dcr && !nfit_mem->dcr) {
+			dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+					spa->range_index, dcr);
+			return -ENODEV;
 		}
 
 		if (type == NFIT_SPA_DCR) {
@@ -595,6 +600,7 @@
 				nfit_mem->idt_dcr = nfit_idt->idt;
 				break;
 			}
+			nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
 		} else {
 			/*
 			 * A single dimm may belong to multiple SPA-PM
@@ -603,13 +609,6 @@
 			 */
 			nfit_mem->memdev_pmem = nfit_memdev->memdev;
 		}
-
-		if (found)
-			continue;
-
-		rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
-		if (rc)
-			return rc;
 	}
 
 	return 0;
@@ -1504,9 +1503,7 @@
 		case 1:
 			/* ARS unsupported, but we should never get here */
 			return 0;
-		case 2:
-			return -EINVAL;
-		case 3:
+		case 6:
 			/* ARS is in progress */
 			msleep(1000);
 			break;
@@ -1517,13 +1514,13 @@
 }
 
 static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
-		struct nd_cmd_ars_status *cmd)
+		struct nd_cmd_ars_status *cmd, u32 size)
 {
 	int rc;
 
 	while (1) {
 		rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
-			sizeof(*cmd));
+			size);
 		if (rc || cmd->status & 0xffff)
 			return -ENXIO;
 
@@ -1538,6 +1535,8 @@
 		case 2:
 			/* No ARS performed for the current boot */
 			return 0;
+		case 3:
+			/* TODO: error list overflow support */
 		default:
 			return -ENXIO;
 		}
@@ -1581,6 +1580,7 @@
 	struct nd_cmd_ars_start *ars_start = NULL;
 	struct nd_cmd_ars_cap *ars_cap = NULL;
 	u64 start, len, cur, remaining;
+	u32 ars_status_size;
 	int rc;
 
 	ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
@@ -1590,14 +1590,21 @@
 	start = ndr_desc->res->start;
 	len = ndr_desc->res->end - ndr_desc->res->start + 1;
 
+	/*
+	 * If ARS is unimplemented, unsupported, or if the 'Persistent Memory
+	 * Scrub' flag in extended status is not set, skip this but continue
+	 * initialization
+	 */
 	rc = ars_get_cap(nd_desc, ars_cap, start, len);
+	if (rc == -ENOTTY) {
+		dev_dbg(acpi_desc->dev,
+			"Address Range Scrub is not implemented, won't create an error list\n");
+		rc = 0;
+		goto out;
+	}
 	if (rc)
 		goto out;
 
-	/*
-	 * If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
-	 * extended status is not set, skip this but continue initialization
-	 */
 	if ((ars_cap->status & 0xffff) ||
 		!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
 		dev_warn(acpi_desc->dev,
@@ -1610,14 +1617,14 @@
 	 * Check if a full-range ARS has been run. If so, use those results
 	 * without having to start a new ARS.
 	 */
-	ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
-			GFP_KERNEL);
+	ars_status_size = ars_cap->max_ars_out;
+	ars_status = kzalloc(ars_status_size, GFP_KERNEL);
 	if (!ars_status) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
-	rc = ars_get_status(nd_desc, ars_status);
+	rc = ars_get_status(nd_desc, ars_status, ars_status_size);
 	if (rc)
 		goto out;
 
@@ -1647,7 +1654,7 @@
 		if (rc)
 			goto out;
 
-		rc = ars_get_status(nd_desc, ars_status);
+		rc = ars_get_status(nd_desc, ars_status, ars_status_size);
 		if (rc)
 			goto out;
 
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c..c8e169e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -406,7 +406,7 @@
 		return 0;
 	}
 
-	if (pci_has_managed_irq(dev))
+	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
 	entry = acpi_pci_irq_lookup(dev, pin);
@@ -451,7 +451,8 @@
 		kfree(entry);
 		return rc;
 	}
-	pci_set_managed_irq(dev, rc);
+	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -474,9 +475,17 @@
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin || !pci_has_managed_irq(dev))
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
+	/* Keep IOAPIC pin configuration when suspending */
+	if (dev->dev.power.is_prepared)
+		return;
+#ifdef	CONFIG_PM
+	if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+		return;
+#endif
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry)
 		return;
@@ -496,6 +505,6 @@
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		pci_reset_managed_irq(dev);
+		dev->irq_managed = 0;
 	}
 }
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index fa28635..ededa90 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -4,7 +4,6 @@
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2002       Dominik Brodowski <devel@brodo.de>
- *  Copyright (c) 2015, The Linux Foundation. All rights reserved.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
@@ -438,6 +437,7 @@
  * enabled system.
  */
 
+#define ACPI_MAX_IRQS		256
 #define ACPI_MAX_ISA_IRQ	16
 
 #define PIRQ_PENALTY_PCI_AVAILABLE	(0)
@@ -447,7 +447,7 @@
 #define PIRQ_PENALTY_ISA_USED		(16*16*16*16*16)
 #define PIRQ_PENALTY_ISA_ALWAYS		(16*16*16*16*16*16)
 
-static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
+static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ0 timer */
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ1 keyboard */
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ2 cascade */
@@ -464,68 +464,9 @@
 	PIRQ_PENALTY_ISA_USED,		/* IRQ13 fpe, sometimes */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ14 ide0 */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ15 ide1 */
+	/* >IRQ15 */
 };
 
-struct irq_penalty_info {
-	int irq;
-	int penalty;
-	struct list_head node;
-};
-
-static LIST_HEAD(acpi_irq_penalty_list);
-
-static int acpi_irq_get_penalty(int irq)
-{
-	struct irq_penalty_info *irq_info;
-
-	if (irq < ACPI_MAX_ISA_IRQ)
-		return acpi_irq_isa_penalty[irq];
-
-	list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
-		if (irq_info->irq == irq)
-			return irq_info->penalty;
-	}
-
-	return 0;
-}
-
-static int acpi_irq_set_penalty(int irq, int new_penalty)
-{
-	struct irq_penalty_info *irq_info;
-
-	/* see if this is a ISA IRQ */
-	if (irq < ACPI_MAX_ISA_IRQ) {
-		acpi_irq_isa_penalty[irq] = new_penalty;
-		return 0;
-	}
-
-	/* next, try to locate from the dynamic list */
-	list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
-		if (irq_info->irq == irq) {
-			irq_info->penalty  = new_penalty;
-			return 0;
-		}
-	}
-
-	/* nope, let's allocate a slot for this IRQ */
-	irq_info = kzalloc(sizeof(*irq_info), GFP_KERNEL);
-	if (!irq_info)
-		return -ENOMEM;
-
-	irq_info->irq = irq;
-	irq_info->penalty = new_penalty;
-	list_add_tail(&irq_info->node, &acpi_irq_penalty_list);
-
-	return 0;
-}
-
-static void acpi_irq_add_penalty(int irq, int penalty)
-{
-	int curpen = acpi_irq_get_penalty(irq);
-
-	acpi_irq_set_penalty(irq, curpen + penalty);
-}
-
 int __init acpi_irq_penalty_init(void)
 {
 	struct acpi_pci_link *link;
@@ -546,16 +487,15 @@
 			    link->irq.possible_count;
 
 			for (i = 0; i < link->irq.possible_count; i++) {
-				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ) {
-					int irqpos = link->irq.possible[i];
-
-					acpi_irq_add_penalty(irqpos, penalty);
-				}
+				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
+					acpi_irq_penalty[link->irq.
+							 possible[i]] +=
+					    penalty;
 			}
 
 		} else if (link->irq.active) {
-			acpi_irq_add_penalty(link->irq.active,
-					     PIRQ_PENALTY_PCI_POSSIBLE);
+			acpi_irq_penalty[link->irq.active] +=
+			    PIRQ_PENALTY_PCI_POSSIBLE;
 		}
 	}
 
@@ -607,12 +547,12 @@
 		 * the use of IRQs 9, 10, 11, and >15.
 		 */
 		for (i = (link->irq.possible_count - 1); i >= 0; i--) {
-			if (acpi_irq_get_penalty(irq) >
-			    acpi_irq_get_penalty(link->irq.possible[i]))
+			if (acpi_irq_penalty[irq] >
+			    acpi_irq_penalty[link->irq.possible[i]])
 				irq = link->irq.possible[i];
 		}
 	}
-	if (acpi_irq_get_penalty(irq) >= PIRQ_PENALTY_ISA_ALWAYS) {
+	if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
 		printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
 			    "Try pci=noacpi or acpi=off\n",
 			    acpi_device_name(link->device),
@@ -628,8 +568,7 @@
 			    acpi_device_bid(link->device));
 		return -ENODEV;
 	} else {
-		acpi_irq_add_penalty(link->irq.active, PIRQ_PENALTY_PCI_USING);
-
+		acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING;
 		printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
 		       acpi_device_name(link->device),
 		       acpi_device_bid(link->device), link->irq.active);
@@ -839,7 +778,7 @@
 }
 
 /*
- * modify penalty from cmdline
+ * modify acpi_irq_penalty[] from cmdline
  */
 static int __init acpi_irq_penalty_update(char *str, int used)
 {
@@ -857,10 +796,13 @@
 		if (irq < 0)
 			continue;
 
+		if (irq >= ARRAY_SIZE(acpi_irq_penalty))
+			continue;
+
 		if (used)
-			acpi_irq_add_penalty(irq, PIRQ_PENALTY_ISA_USED);
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
 		else
-			acpi_irq_set_penalty(irq, PIRQ_PENALTY_PCI_AVAILABLE);
+			acpi_irq_penalty[irq] = PIRQ_PENALTY_PCI_AVAILABLE;
 
 		if (retval != 2)	/* no next number */
 			break;
@@ -877,15 +819,18 @@
  */
 void acpi_penalize_isa_irq(int irq, int active)
 {
-	if (irq >= 0)
-		acpi_irq_add_penalty(irq, active ?
-			PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
+	if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+		if (active)
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
+		else
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+	}
 }
 
 bool acpi_isa_irq_available(int irq)
 {
-	return irq >= 0 &&
-		(acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
+	return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
+			    acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
 }
 
 /*
@@ -895,18 +840,13 @@
  */
 void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
 {
-	int penalty;
-
-	if (irq < 0)
-		return;
-
-	if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
-	    polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
-		penalty = PIRQ_PENALTY_ISA_ALWAYS;
-	else
-		penalty = PIRQ_PENALTY_PCI_USING;
-
-	acpi_irq_add_penalty(irq, penalty);
+	if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+		if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
+		    polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS;
+		else
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+	}
 }
 
 /*
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 90e2d54..1316ddd 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -135,14 +135,6 @@
 		DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
 		},
 	},
-	{
-	.callback = video_detect_force_vendor,
-	.ident = "Dell Inspiron 5737",
-	.matches = {
-		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-		},
-	},
 
 	/*
 	 * These models have a working acpi_video backlight control, and using
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index a39e85f..7d00b7a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2074,7 +2074,7 @@
 			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 
-			ptr += sizeof(void *);
+			ptr += sizeof(cookie);
 			list_for_each_entry(w, &proc->delivered_death, entry) {
 				struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
 
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 594fcab..146dc0b 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -264,6 +264,26 @@
 	{ PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */
 	{ PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */
 	{ PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b2), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b3), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b4), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b5), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b6), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19b7), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19bE), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19bF), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c0), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c1), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c2), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c3), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c4), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c5), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c6), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19c7), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */
+	{ PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */
 	{ PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */
 	{ PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */
 	{ PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */
@@ -347,15 +367,21 @@
 	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/
 	{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
 	{ PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
 	{ PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
@@ -1305,6 +1331,44 @@
 {}
 #endif
 
+#ifdef CONFIG_ARM64
+/*
+ * Due to ERRATA#22536, ThunderX needs to handle HOST_IRQ_STAT differently.
+ * Workaround is to make sure all pending IRQs are served before leaving
+ * handler.
+ */
+static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+{
+	struct ata_host *host = dev_instance;
+	struct ahci_host_priv *hpriv;
+	unsigned int rc = 0;
+	void __iomem *mmio;
+	u32 irq_stat, irq_masked;
+	unsigned int handled = 1;
+
+	VPRINTK("ENTER\n");
+	hpriv = host->private_data;
+	mmio = hpriv->mmio;
+	irq_stat = readl(mmio + HOST_IRQ_STAT);
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	do {
+		irq_masked = irq_stat & hpriv->port_map;
+		spin_lock(&host->lock);
+		rc = ahci_handle_port_intr(host, irq_masked);
+		if (!rc)
+			handled = 0;
+		writel(irq_stat, mmio + HOST_IRQ_STAT);
+		irq_stat = readl(mmio + HOST_IRQ_STAT);
+		spin_unlock(&host->lock);
+	} while (irq_stat);
+	VPRINTK("EXIT\n");
+
+	return IRQ_RETVAL(handled);
+}
+#endif
+
 /*
  * ahci_init_msix() - optionally enable per-port MSI-X otherwise defer
  * to single msi.
@@ -1540,6 +1604,11 @@
 	if (ahci_broken_devslp(pdev))
 		hpriv->flags |= AHCI_HFLAG_NO_DEVSLP;
 
+#ifdef CONFIG_ARM64
+	if (pdev->vendor == 0x177d && pdev->device == 0xa01c)
+		hpriv->irq_handler = ahci_thunderx_irq_handler;
+#endif
+
 	/* save initial config */
 	ahci_pci_save_initial_config(pdev, hpriv);
 
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index a4faa43..167ba7e 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -240,8 +240,7 @@
 						        error-handling stage) */
 	AHCI_HFLAG_NO_DEVSLP		= (1 << 17), /* no device sleep */
 	AHCI_HFLAG_NO_FBS		= (1 << 18), /* no FBS */
-	AHCI_HFLAG_EDGE_IRQ		= (1 << 19), /* HOST_IRQ_STAT behaves as
-							Edge Triggered */
+
 #ifdef CONFIG_PCI_MSI
 	AHCI_HFLAG_MULTI_MSI		= (1 << 20), /* multiple PCI MSIs */
 	AHCI_HFLAG_MULTI_MSIX		= (1 << 21), /* per-port MSI-X */
@@ -250,6 +249,7 @@
 	AHCI_HFLAG_MULTI_MSI		= 0,
 	AHCI_HFLAG_MULTI_MSIX		= 0,
 #endif
+	AHCI_HFLAG_WAKE_BEFORE_STOP	= (1 << 22), /* wake before DMA stop */
 
 	/* ap->flags bits */
 
@@ -360,6 +360,7 @@
 	 * be overridden anytime before the host is activated.
 	 */
 	void			(*start_engine)(struct ata_port *ap);
+	irqreturn_t 		(*irq_handler)(int irq, void *dev_instance);
 };
 
 #ifdef CONFIG_PCI_MSI
@@ -423,6 +424,7 @@
 void ahci_print_info(struct ata_host *host, const char *scc_s);
 int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht);
 void ahci_error_handler(struct ata_port *ap);
+u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked);
 
 static inline void __iomem *__ahci_port_base(struct ata_host *host,
 					     unsigned int port_no)
diff --git a/drivers/ata/ahci_brcmstb.c b/drivers/ata/ahci_brcmstb.c
index b36cae2..e87bcec 100644
--- a/drivers/ata/ahci_brcmstb.c
+++ b/drivers/ata/ahci_brcmstb.c
@@ -317,6 +317,7 @@
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 	hpriv->plat_data = priv;
+	hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP;
 
 	brcm_sata_alpm_init(hpriv);
 
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index e2c6d9e..8e3f7fa 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -548,6 +548,88 @@
 	return rc;
 }
 
+/**
+ * xgene_ahci_handle_broken_edge_irq - Handle the broken irq.
+ * @ata_host: Host that recieved the irq
+ * @irq_masked: HOST_IRQ_STAT value
+ *
+ * For hardware with broken edge trigger latch
+ * the HOST_IRQ_STAT register misses the edge interrupt
+ * when clearing of HOST_IRQ_STAT register and hardware
+ * reporting the PORT_IRQ_STAT register at the
+ * same clock cycle.
+ * As such, the algorithm below outlines the workaround.
+ *
+ * 1. Read HOST_IRQ_STAT register and save the state.
+ * 2. Clear the HOST_IRQ_STAT register.
+ * 3. Read back the HOST_IRQ_STAT register.
+ * 4. If HOST_IRQ_STAT register equals to zero, then
+ *    traverse the rest of port's PORT_IRQ_STAT register
+ *    to check if an interrupt is triggered at that point else
+ *    go to step 6.
+ * 5. If PORT_IRQ_STAT register of rest ports is not equal to zero
+ *    then update the state of HOST_IRQ_STAT saved in step 1.
+ * 6. Handle port interrupts.
+ * 7. Exit
+ */
+static int xgene_ahci_handle_broken_edge_irq(struct ata_host *host,
+					     u32 irq_masked)
+{
+	struct ahci_host_priv *hpriv = host->private_data;
+	void __iomem *port_mmio;
+	int i;
+
+	if (!readl(hpriv->mmio + HOST_IRQ_STAT)) {
+		for (i = 0; i < host->n_ports; i++) {
+			if (irq_masked & (1 << i))
+				continue;
+
+			port_mmio = ahci_port_base(host->ports[i]);
+			if (readl(port_mmio + PORT_IRQ_STAT))
+				irq_masked |= (1 << i);
+		}
+	}
+
+	return ahci_handle_port_intr(host, irq_masked);
+}
+
+static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+{
+	struct ata_host *host = dev_instance;
+	struct ahci_host_priv *hpriv;
+	unsigned int rc = 0;
+	void __iomem *mmio;
+	u32 irq_stat, irq_masked;
+
+	VPRINTK("ENTER\n");
+
+	hpriv = host->private_data;
+	mmio = hpriv->mmio;
+
+	/* sigh.  0xffffffff is a valid return from h/w */
+	irq_stat = readl(mmio + HOST_IRQ_STAT);
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	irq_masked = irq_stat & hpriv->port_map;
+
+	spin_lock(&host->lock);
+
+	/*
+	 * HOST_IRQ_STAT behaves as edge triggered latch meaning that
+	 * it should be cleared before all the port events are cleared.
+	 */
+	writel(irq_stat, mmio + HOST_IRQ_STAT);
+
+	rc = xgene_ahci_handle_broken_edge_irq(host, irq_masked);
+
+	spin_unlock(&host->lock);
+
+	VPRINTK("EXIT\n");
+
+	return IRQ_RETVAL(rc);
+}
+
 static struct ata_port_operations xgene_ahci_v1_ops = {
 	.inherits = &ahci_ops,
 	.host_stop = xgene_ahci_host_stop,
@@ -779,7 +861,8 @@
 		hpriv->flags = AHCI_HFLAG_NO_NCQ;
 		break;
 	case XGENE_AHCI_V2:
-		hpriv->flags |= AHCI_HFLAG_YES_FBS | AHCI_HFLAG_EDGE_IRQ;
+		hpriv->flags |= AHCI_HFLAG_YES_FBS;
+		hpriv->irq_handler = xgene_ahci_irq_intr;
 		break;
 	default:
 		break;
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index d61740e..85ea514 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -113,6 +113,7 @@
 				    const char *buf, size_t size);
 static ssize_t ahci_show_em_supported(struct device *dev,
 				      struct device_attribute *attr, char *buf);
+static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance);
 
 static DEVICE_ATTR(ahci_host_caps, S_IRUGO, ahci_show_host_caps, NULL);
 static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL);
@@ -496,8 +497,8 @@
 		}
 	}
 
-	/* fabricate port_map from cap.nr_ports */
-	if (!port_map) {
+	/* fabricate port_map from cap.nr_ports for < AHCI 1.3 */
+	if (!port_map && vers < 0x10300) {
 		port_map = (1 << ahci_nr_ports(cap)) - 1;
 		dev_warn(dev, "forcing PORTS_IMPL to 0x%x\n", port_map);
 
@@ -512,6 +513,9 @@
 
 	if (!hpriv->start_engine)
 		hpriv->start_engine = ahci_start_engine;
+
+	if (!hpriv->irq_handler)
+		hpriv->irq_handler = ahci_single_level_irq_intr;
 }
 EXPORT_SYMBOL_GPL(ahci_save_initial_config);
 
@@ -593,8 +597,22 @@
 int ahci_stop_engine(struct ata_port *ap)
 {
 	void __iomem *port_mmio = ahci_port_base(ap);
+	struct ahci_host_priv *hpriv = ap->host->private_data;
 	u32 tmp;
 
+	/*
+	 * On some controllers, stopping a port's DMA engine while the port
+	 * is in ALPM state (partial or slumber) results in failures on
+	 * subsequent DMA engine starts.  For those controllers, put the
+	 * port back in active state before stopping its DMA engine.
+	 */
+	if ((hpriv->flags & AHCI_HFLAG_WAKE_BEFORE_STOP) &&
+	    (ap->link.lpm_policy > ATA_LPM_MAX_POWER) &&
+	    ahci_set_lpm(&ap->link, ATA_LPM_MAX_POWER, ATA_LPM_WAKE_ONLY)) {
+		dev_err(ap->host->dev, "Failed to wake up port before engine stop\n");
+		return -EIO;
+	}
+
 	tmp = readl(port_mmio + PORT_CMD);
 
 	/* check if the HBA is idle */
@@ -689,6 +707,9 @@
 	void __iomem *port_mmio = ahci_port_base(ap);
 
 	if (policy != ATA_LPM_MAX_POWER) {
+		/* wakeup flag only applies to the max power policy */
+		hints &= ~ATA_LPM_WAKE_ONLY;
+
 		/*
 		 * Disable interrupts on Phy Ready. This keeps us from
 		 * getting woken up due to spurious phy ready
@@ -704,7 +725,8 @@
 		u32 cmd = readl(port_mmio + PORT_CMD);
 
 		if (policy == ATA_LPM_MAX_POWER || !(hints & ATA_LPM_HIPM)) {
-			cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
+			if (!(hints & ATA_LPM_WAKE_ONLY))
+				cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
 			cmd |= PORT_CMD_ICC_ACTIVE;
 
 			writel(cmd, port_mmio + PORT_CMD);
@@ -712,6 +734,9 @@
 
 			/* wait 10ms to be sure we've come out of LPM state */
 			ata_msleep(ap, 10);
+
+			if (hints & ATA_LPM_WAKE_ONLY)
+				return 0;
 		} else {
 			cmd |= PORT_CMD_ALPE;
 			if (policy == ATA_LPM_MIN_POWER)
@@ -1143,8 +1168,7 @@
 
 	/* mark esata ports */
 	tmp = readl(port_mmio + PORT_CMD);
-	if ((tmp & PORT_CMD_HPCP) ||
-	    ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)))
+	if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))
 		ap->pflags |= ATA_PFLAG_EXTERNAL;
 }
 
@@ -1825,7 +1849,7 @@
 	return IRQ_HANDLED;
 }
 
-static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
+u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
 {
 	unsigned int i, handled = 0;
 
@@ -1851,43 +1875,7 @@
 
 	return handled;
 }
-
-static irqreturn_t ahci_single_edge_irq_intr(int irq, void *dev_instance)
-{
-	struct ata_host *host = dev_instance;
-	struct ahci_host_priv *hpriv;
-	unsigned int rc = 0;
-	void __iomem *mmio;
-	u32 irq_stat, irq_masked;
-
-	VPRINTK("ENTER\n");
-
-	hpriv = host->private_data;
-	mmio = hpriv->mmio;
-
-	/* sigh.  0xffffffff is a valid return from h/w */
-	irq_stat = readl(mmio + HOST_IRQ_STAT);
-	if (!irq_stat)
-		return IRQ_NONE;
-
-	irq_masked = irq_stat & hpriv->port_map;
-
-	spin_lock(&host->lock);
-
-	/*
-	 * HOST_IRQ_STAT behaves as edge triggered latch meaning that
-	 * it should be cleared before all the port events are cleared.
-	 */
-	writel(irq_stat, mmio + HOST_IRQ_STAT);
-
-	rc = ahci_handle_port_intr(host, irq_masked);
-
-	spin_unlock(&host->lock);
-
-	VPRINTK("EXIT\n");
-
-	return IRQ_RETVAL(rc);
-}
+EXPORT_SYMBOL_GPL(ahci_handle_port_intr);
 
 static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
 {
@@ -2514,14 +2502,18 @@
 	int irq = hpriv->irq;
 	int rc;
 
-	if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX))
+	if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX)) {
+		if (hpriv->irq_handler)
+			dev_warn(host->dev, "both AHCI_HFLAG_MULTI_MSI flag set \
+				 and custom irq handler implemented\n");
+
 		rc = ahci_host_activate_multi_irqs(host, sht);
-	else if (hpriv->flags & AHCI_HFLAG_EDGE_IRQ)
-		rc = ata_host_activate(host, irq, ahci_single_edge_irq_intr,
+	} else {
+		rc = ata_host_activate(host, irq, hpriv->irq_handler,
 				       IRQF_SHARED, sht);
-	else
-		rc = ata_host_activate(host, irq, ahci_single_level_irq_intr,
-				       IRQF_SHARED, sht);
+	}
+
+
 	return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_host_activate);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index cbb7471..55e257c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4125,6 +4125,7 @@
 	{ "SAMSUNG CD-ROM SN-124", "N001",	ATA_HORKAGE_NODMA },
 	{ "Seagate STT20000A", NULL,		ATA_HORKAGE_NODMA },
 	{ " 2GB ATA Flash Disk", "ADMA428M",	ATA_HORKAGE_NODMA },
+	{ "VRFDFC22048UCHC-TE*", NULL,		ATA_HORKAGE_NODMA },
 	/* Odd clown on sil3726/4726 PMPs */
 	{ "Config  Disk",	NULL,		ATA_HORKAGE_DISABLE },
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7e959f9..e417e1a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -675,19 +675,18 @@
 int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
 		     int cmd, void __user *arg)
 {
-	int val = -EINVAL, rc = -EINVAL;
+	unsigned long val;
+	int rc = -EINVAL;
 	unsigned long flags;
 
 	switch (cmd) {
-	case ATA_IOC_GET_IO32:
+	case HDIO_GET_32BIT:
 		spin_lock_irqsave(ap->lock, flags);
 		val = ata_ioc32(ap);
 		spin_unlock_irqrestore(ap->lock, flags);
-		if (copy_to_user(arg, &val, 1))
-			return -EFAULT;
-		return 0;
+		return put_user(val, (unsigned long __user *)arg);
 
-	case ATA_IOC_SET_IO32:
+	case HDIO_SET_32BIT:
 		val = (unsigned long) arg;
 		rc = 0;
 		spin_lock_irqsave(ap->lock, flags);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index cdf6215..051b615 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -997,12 +997,9 @@
 static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 {
 	struct ata_port *ap = qc->ap;
-	unsigned long flags;
 
 	if (ap->ops->error_handler) {
 		if (in_wq) {
-			spin_lock_irqsave(ap->lock, flags);
-
 			/* EH might have kicked in while host lock is
 			 * released.
 			 */
@@ -1014,8 +1011,6 @@
 				} else
 					ata_port_freeze(ap);
 			}
-
-			spin_unlock_irqrestore(ap->lock, flags);
 		} else {
 			if (likely(!(qc->err_mask & AC_ERR_HSM)))
 				ata_qc_complete(qc);
@@ -1024,10 +1019,8 @@
 		}
 	} else {
 		if (in_wq) {
-			spin_lock_irqsave(ap->lock, flags);
 			ata_sff_irq_on(ap);
 			ata_qc_complete(qc);
-			spin_unlock_irqrestore(ap->lock, flags);
 		} else
 			ata_qc_complete(qc);
 	}
@@ -1048,9 +1041,10 @@
 {
 	struct ata_link *link = qc->dev->link;
 	struct ata_eh_info *ehi = &link->eh_info;
-	unsigned long flags = 0;
 	int poll_next;
 
+	lockdep_assert_held(ap->lock);
+
 	WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0);
 
 	/* Make sure ata_sff_qc_issue() does not throw things
@@ -1112,14 +1106,6 @@
 			}
 		}
 
-		/* Send the CDB (atapi) or the first data block (ata pio out).
-		 * During the state transition, interrupt handler shouldn't
-		 * be invoked before the data transfer is complete and
-		 * hsm_task_state is changed. Hence, the following locking.
-		 */
-		if (in_wq)
-			spin_lock_irqsave(ap->lock, flags);
-
 		if (qc->tf.protocol == ATA_PROT_PIO) {
 			/* PIO data out protocol.
 			 * send first data block.
@@ -1135,9 +1121,6 @@
 			/* send CDB */
 			atapi_send_cdb(ap, qc);
 
-		if (in_wq)
-			spin_unlock_irqrestore(ap->lock, flags);
-
 		/* if polling, ata_sff_pio_task() handles the rest.
 		 * otherwise, interrupt handler takes over from here.
 		 */
@@ -1296,7 +1279,8 @@
 		break;
 	default:
 		poll_next = 0;
-		BUG();
+		WARN(true, "ata%d: SFF host state machine in invalid state %d",
+		     ap->print_id, ap->hsm_task_state);
 	}
 
 	return poll_next;
@@ -1361,12 +1345,14 @@
 	u8 status;
 	int poll_next;
 
+	spin_lock_irq(ap->lock);
+
 	BUG_ON(ap->sff_pio_task_link == NULL);
 	/* qc can be NULL if timeout occurred */
 	qc = ata_qc_from_tag(ap, link->active_tag);
 	if (!qc) {
 		ap->sff_pio_task_link = NULL;
-		return;
+		goto out_unlock;
 	}
 
 fsm_start:
@@ -1381,11 +1367,14 @@
 	 */
 	status = ata_sff_busy_wait(ap, ATA_BUSY, 5);
 	if (status & ATA_BUSY) {
+		spin_unlock_irq(ap->lock);
 		ata_msleep(ap, 2);
+		spin_lock_irq(ap->lock);
+
 		status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
 		if (status & ATA_BUSY) {
 			ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE);
-			return;
+			goto out_unlock;
 		}
 	}
 
@@ -1402,6 +1391,8 @@
 	 */
 	if (poll_next)
 		goto fsm_start;
+out_unlock:
+	spin_unlock_irq(ap->lock);
 }
 
 /**
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 12fe0f3..c8b6a78 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -32,6 +32,8 @@
 #include <linux/libata.h>
 #include <scsi/scsi_host.h>
 
+#include <asm/mach-rc32434/rb.h>
+
 #define DRV_NAME	"pata-rb532-cf"
 #define DRV_VERSION	"0.1.0"
 #define DRV_DESC	"PATA driver for RouterBOARD 532 Compact Flash"
@@ -107,6 +109,7 @@
 	int gpio;
 	struct resource *res;
 	struct ata_host *ah;
+	struct cf_device *pdata;
 	struct rb532_cf_info *info;
 	int ret;
 
@@ -122,7 +125,13 @@
 		return -ENOENT;
 	}
 
-	gpio = irq_to_gpio(irq);
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data specified\n");
+		return -EINVAL;
+	}
+
+	gpio = pdata->gpio_pin;
 	if (gpio < 0) {
 		dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq);
 		return -ENOENT;
diff --git a/drivers/base/component.c b/drivers/base/component.c
index 89f5cf68..04a1582 100644
--- a/drivers/base/component.c
+++ b/drivers/base/component.c
@@ -206,6 +206,8 @@
 		if (mc->release)
 			mc->release(master, mc->data);
 	}
+
+	kfree(match->compare);
 }
 
 static void devm_component_match_release(struct device *dev, void *res)
@@ -221,14 +223,14 @@
 	if (match->alloc == num)
 		return 0;
 
-	new = devm_kmalloc_array(dev, num, sizeof(*new), GFP_KERNEL);
+	new = kmalloc_array(num, sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
 
 	if (match->compare) {
 		memcpy(new, match->compare, sizeof(*new) *
 					    min(match->num, num));
-		devm_kfree(dev, match->compare);
+		kfree(match->compare);
 	}
 	match->compare = new;
 	match->alloc = num;
@@ -283,6 +285,24 @@
 }
 EXPORT_SYMBOL(component_match_add_release);
 
+static void free_master(struct master *master)
+{
+	struct component_match *match = master->match;
+	int i;
+
+	list_del(&master->node);
+
+	if (match) {
+		for (i = 0; i < match->num; i++) {
+			struct component *c = match->compare[i].component;
+			if (c)
+				c->master = NULL;
+		}
+	}
+
+	kfree(master);
+}
+
 int component_master_add_with_match(struct device *dev,
 	const struct component_master_ops *ops,
 	struct component_match *match)
@@ -309,11 +329,9 @@
 
 	ret = try_to_bring_up_master(master, NULL);
 
-	if (ret < 0) {
-		/* Delete off the list if we weren't successful */
-		list_del(&master->node);
-		kfree(master);
-	}
+	if (ret < 0)
+		free_master(master);
+
 	mutex_unlock(&component_mutex);
 
 	return ret < 0 ? ret : 0;
@@ -324,25 +342,12 @@
 	const struct component_master_ops *ops)
 {
 	struct master *master;
-	int i;
 
 	mutex_lock(&component_mutex);
 	master = __master_find(dev, ops);
 	if (master) {
-		struct component_match *match = master->match;
-
 		take_down_master(master);
-
-		list_del(&master->node);
-
-		if (match) {
-			for (i = 0; i < match->num; i++) {
-				struct component *c = match->compare[i].component;
-				if (c)
-					c->master = NULL;
-			}
-		}
-		kfree(master);
+		free_master(master);
 	}
 	mutex_unlock(&component_mutex);
 }
@@ -486,6 +491,8 @@
 
 	ret = try_to_bring_up_masters(component);
 	if (ret < 0) {
+		if (component->master)
+			remove_component(component->master, component);
 		list_del(&component->node);
 
 		kfree(component);
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 47c4338..279e539 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -284,6 +284,7 @@
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
 
 /**
  * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
@@ -301,6 +302,7 @@
 	msi_domain_free_irqs(dev->msi_domain, dev);
 	platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
 
 /**
  * platform_msi_get_host_data - Query the private data associated with
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 73d6e5d3..f437afa 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -558,10 +558,15 @@
 		return ret;
 
 	ret = dev_pm_domain_attach(_dev, true);
-	if (ret != -EPROBE_DEFER && drv->probe) {
-		ret = drv->probe(dev);
-		if (ret)
-			dev_pm_domain_detach(_dev, true);
+	if (ret != -EPROBE_DEFER) {
+		if (drv->probe) {
+			ret = drv->probe(dev);
+			if (ret)
+				dev_pm_domain_detach(_dev, true);
+		} else {
+			/* don't fail if just dev_pm_domain_attach failed */
+			ret = 0;
+		}
 	}
 
 	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 93ed14c..f6a9ad5 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -146,7 +146,7 @@
 	if (dev->pm_domain == pd)
 		return;
 
-	WARN(device_is_bound(dev),
+	WARN(pd && device_is_bound(dev),
 	     "PM domains can only be changed for unbound devices\n");
 	dev->pm_domain = pd;
 	device_pm_check_callbacks(dev);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 6ac9a7f..301b785 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -162,7 +162,7 @@
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.
@@ -172,16 +172,15 @@
 	queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
- * __genpd_poweron - Restore power to a given PM domain and its masters.
+ * genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
 	struct gpd_link *link;
 	int ret = 0;
@@ -196,11 +195,16 @@
 	 * with it.
 	 */
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
-		genpd_sd_counter_inc(link->master);
+		struct generic_pm_domain *master = link->master;
 
-		ret = genpd_poweron(link->master);
+		genpd_sd_counter_inc(master);
+
+		mutex_lock_nested(&master->lock, depth + 1);
+		ret = genpd_poweron(master, depth + 1);
+		mutex_unlock(&master->lock);
+
 		if (ret) {
-			genpd_sd_counter_dec(link->master);
+			genpd_sd_counter_dec(master);
 			goto err;
 		}
 	}
@@ -223,20 +227,6 @@
 	return ret;
 }
 
-/**
- * genpd_poweron - Restore power to a given PM domain and its masters.
- * @genpd: PM domain to power up.
- */
-static int genpd_poweron(struct generic_pm_domain *genpd)
-{
-	int ret;
-
-	mutex_lock(&genpd->lock);
-	ret = __genpd_poweron(genpd);
-	mutex_unlock(&genpd->lock);
-	return ret;
-}
-
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
 	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -484,7 +474,7 @@
 	}
 
 	mutex_lock(&genpd->lock);
-	ret = __genpd_poweron(genpd);
+	ret = genpd_poweron(genpd, 0);
 	mutex_unlock(&genpd->lock);
 
 	if (ret)
@@ -1339,8 +1329,8 @@
 	if (!link)
 		return -ENOMEM;
 
-	mutex_lock(&genpd->lock);
-	mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+	mutex_lock(&subdomain->lock);
+	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
 	if (genpd->status == GPD_STATE_POWER_OFF
 	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1363,8 +1353,8 @@
 		genpd_sd_counter_inc(genpd);
 
  out:
-	mutex_unlock(&subdomain->lock);
 	mutex_unlock(&genpd->lock);
+	mutex_unlock(&subdomain->lock);
 	if (ret)
 		kfree(link);
 	return ret;
@@ -1385,7 +1375,8 @@
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
 		return -EINVAL;
 
-	mutex_lock(&genpd->lock);
+	mutex_lock(&subdomain->lock);
+	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
 	if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
 		pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1398,22 +1389,19 @@
 		if (link->slave != subdomain)
 			continue;
 
-		mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
 		list_del(&link->master_node);
 		list_del(&link->slave_node);
 		kfree(link);
 		if (subdomain->status != GPD_STATE_POWER_OFF)
 			genpd_sd_counter_dec(genpd);
 
-		mutex_unlock(&subdomain->lock);
-
 		ret = 0;
 		break;
 	}
 
 out:
 	mutex_unlock(&genpd->lock);
+	mutex_unlock(&subdomain->lock);
 
 	return ret;
 }
@@ -1818,8 +1806,10 @@
 
 	dev->pm_domain->detach = genpd_dev_pm_detach;
 	dev->pm_domain->sync = genpd_dev_pm_sync;
-	ret = genpd_poweron(pd);
 
+	mutex_lock(&pd->lock);
+	ret = genpd_poweron(pd, 0);
+	mutex_unlock(&pd->lock);
 out:
 	return ret ? -EPROBE_DEFER : 0;
 }
diff --git a/drivers/base/property.c b/drivers/base/property.c
index c359351..a163f2c5 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -218,7 +218,7 @@
 	bool ret;
 
 	ret = __fwnode_property_present(fwnode, propname);
-	if (ret == false && fwnode && fwnode->secondary)
+	if (ret == false && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
 		ret = __fwnode_property_present(fwnode->secondary, propname);
 	return ret;
 }
@@ -423,7 +423,7 @@
 	int _ret_;									\
 	_ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_,		\
 				 _val_, _nval_);					\
-	if (_ret_ == -EINVAL && _fwnode_ && _fwnode_->secondary)			\
+	if (_ret_ == -EINVAL && _fwnode_ && !IS_ERR_OR_NULL(_fwnode_->secondary))	\
 		_ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_,	\
 				_proptype_, _val_, _nval_);				\
 	_ret_;										\
@@ -593,7 +593,7 @@
 	int ret;
 
 	ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
-	if (ret == -EINVAL && fwnode && fwnode->secondary)
+	if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
 		ret = __fwnode_property_read_string_array(fwnode->secondary,
 							  propname, val, nval);
 	return ret;
@@ -621,7 +621,7 @@
 	int ret;
 
 	ret = __fwnode_property_read_string(fwnode, propname, val);
-	if (ret == -EINVAL && fwnode && fwnode->secondary)
+	if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
 		ret = __fwnode_property_read_string(fwnode->secondary,
 						    propname, val);
 	return ret;
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 8812bfb..eea5156 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -133,17 +133,17 @@
 	while (val_size) {
 		switch (ctx->val_bytes) {
 		case 1:
-			__raw_writeb(*(u8 *)val, ctx->regs + offset);
+			writeb(*(u8 *)val, ctx->regs + offset);
 			break;
 		case 2:
-			__raw_writew(*(u16 *)val, ctx->regs + offset);
+			writew(*(u16 *)val, ctx->regs + offset);
 			break;
 		case 4:
-			__raw_writel(*(u32 *)val, ctx->regs + offset);
+			writel(*(u32 *)val, ctx->regs + offset);
 			break;
 #ifdef CONFIG_64BIT
 		case 8:
-			__raw_writeq(*(u64 *)val, ctx->regs + offset);
+			writeq(*(u64 *)val, ctx->regs + offset);
 			break;
 #endif
 		default:
@@ -193,17 +193,17 @@
 	while (val_size) {
 		switch (ctx->val_bytes) {
 		case 1:
-			*(u8 *)val = __raw_readb(ctx->regs + offset);
+			*(u8 *)val = readb(ctx->regs + offset);
 			break;
 		case 2:
-			*(u16 *)val = __raw_readw(ctx->regs + offset);
+			*(u16 *)val = readw(ctx->regs + offset);
 			break;
 		case 4:
-			*(u32 *)val = __raw_readl(ctx->regs + offset);
+			*(u32 *)val = readl(ctx->regs + offset);
 			break;
 #ifdef CONFIG_64BIT
 		case 8:
-			*(u64 *)val = __raw_readq(ctx->regs + offset);
+			*(u64 *)val = readq(ctx->regs + offset);
 			break;
 #endif
 		default:
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9e25120..84708a5 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -866,7 +866,7 @@
 }
 
 /* locks the driver */
-static int lock_fdc(int drive, bool interruptible)
+static int lock_fdc(int drive)
 {
 	if (WARN(atomic_read(&usage_count) == 0,
 		 "Trying to lock fdc while usage count=0\n"))
@@ -2173,7 +2173,7 @@
 {
 	int ret;
 
-	if (lock_fdc(drive, true))
+	if (lock_fdc(drive))
 		return -EINTR;
 
 	set_floppy(drive);
@@ -2960,7 +2960,7 @@
 {
 	int ret;
 
-	if (lock_fdc(drive, interruptible))
+	if (lock_fdc(drive))
 		return -EINTR;
 
 	if (arg == FD_RESET_ALWAYS)
@@ -3243,7 +3243,7 @@
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		mutex_lock(&open_lock);
-		if (lock_fdc(drive, true)) {
+		if (lock_fdc(drive)) {
 			mutex_unlock(&open_lock);
 			return -EINTR;
 		}
@@ -3263,7 +3263,7 @@
 	} else {
 		int oldStretch;
 
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (cmd != FDDEFPRM) {
 			/* notice a disk change immediately, else
@@ -3349,7 +3349,7 @@
 	if (type)
 		*g = &floppy_type[type];
 	else {
-		if (lock_fdc(drive, false))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (poll_drive(false, 0) == -EINTR)
 			return -EINTR;
@@ -3433,7 +3433,7 @@
 		if (UDRS->fd_ref != 1)
 			/* somebody else has this drive open */
 			return -EBUSY;
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 
 		/* do the actual eject. Fails on
@@ -3445,7 +3445,7 @@
 		process_fd_request();
 		return ret;
 	case FDCLRPRM:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		current_type[drive] = NULL;
 		floppy_sizes[drive] = MAX_DISK_SIZE << 1;
@@ -3467,7 +3467,7 @@
 		UDP->flags &= ~FTD_MSG;
 		return 0;
 	case FDFMTBEG:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
 			return -EINTR;
@@ -3484,7 +3484,7 @@
 		return do_format(drive, &inparam.f);
 	case FDFMTEND:
 	case FDFLUSH:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		return invalidate_drive(bdev);
 	case FDSETEMSGTRESH:
@@ -3507,7 +3507,7 @@
 		outparam = UDP;
 		break;
 	case FDPOLLDRVSTAT:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
 			return -EINTR;
@@ -3530,7 +3530,7 @@
 	case FDRAWCMD:
 		if (type)
 			return -EINVAL;
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		set_floppy(drive);
 		i = raw_cmd_ioctl(cmd, (void __user *)param);
@@ -3539,7 +3539,7 @@
 		process_fd_request();
 		return i;
 	case FDTWADDLE:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		twaddle();
 		process_fd_request();
@@ -3663,6 +3663,11 @@
 
 	opened_bdev[drive] = bdev;
 
+	if (!(mode & (FMODE_READ|FMODE_WRITE))) {
+		res = -EINVAL;
+		goto out;
+	}
+
 	res = -ENXIO;
 
 	if (!floppy_track_buffer) {
@@ -3706,21 +3711,20 @@
 	if (UFDCS->rawcmd == 1)
 		UFDCS->rawcmd = 2;
 
-	if (!(mode & FMODE_NDELAY)) {
-		if (mode & (FMODE_READ|FMODE_WRITE)) {
-			UDRS->last_checked = 0;
-			clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
-			check_disk_change(bdev);
-			if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
-				goto out;
-			if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
-				goto out;
-		}
-		res = -EROFS;
-		if ((mode & FMODE_WRITE) &&
-		    !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
-			goto out;
-	}
+	UDRS->last_checked = 0;
+	clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+	check_disk_change(bdev);
+	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+		goto out;
+	if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+		goto out;
+
+	res = -EROFS;
+
+	if ((mode & FMODE_WRITE) &&
+			!test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+		goto out;
+
 	mutex_unlock(&open_lock);
 	mutex_unlock(&floppy_mutex);
 	return 0;
@@ -3748,7 +3752,8 @@
 		return DISK_EVENT_MEDIA_CHANGE;
 
 	if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
-		lock_fdc(drive, false);
+		if (lock_fdc(drive))
+			return -EINTR;
 		poll_drive(false, 0);
 		process_fd_request();
 	}
@@ -3847,7 +3852,9 @@
 			 "VFS: revalidate called on non-open device.\n"))
 			return -EFAULT;
 
-		lock_fdc(drive, false);
+		res = lock_fdc(drive);
+		if (res)
+			return res;
 		cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
 		      test_bit(FD_VERIFY_BIT, &UDRS->flags));
 		if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 8ba1e97..64a7b59 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -478,7 +478,7 @@
 	id->ver_id = 0x1;
 	id->vmnt = 0;
 	id->cgrps = 1;
-	id->cap = 0x3;
+	id->cap = 0x2;
 	id->dom = 0x1;
 
 	id->ppaf.blk_offset = 0;
@@ -707,9 +707,7 @@
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
 
-
 	mutex_lock(&lock);
-	list_add_tail(&nullb->list, &nullb_list);
 	nullb->index = nullb_indexes++;
 	mutex_unlock(&lock);
 
@@ -743,6 +741,10 @@
 	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
 
 	add_disk(disk);
+
+	mutex_lock(&lock);
+	list_add_tail(&nullb->list, &nullb_list);
+	mutex_unlock(&lock);
 done:
 	return 0;
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8a8dc91..83eb9e6 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1873,6 +1873,43 @@
 	return err;
 }
 
+static int negotiate_mq(struct blkfront_info *info)
+{
+	unsigned int backend_max_queues = 0;
+	int err;
+	unsigned int i;
+
+	BUG_ON(info->nr_rings);
+
+	/* Check if backend supports multiple queues. */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "multi-queue-max-queues", "%u", &backend_max_queues);
+	if (err < 0)
+		backend_max_queues = 1;
+
+	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+	/* We need at least one ring. */
+	if (!info->nr_rings)
+		info->nr_rings = 1;
+
+	info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
+	if (!info->rinfo) {
+		xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < info->nr_rings; i++) {
+		struct blkfront_ring_info *rinfo;
+
+		rinfo = &info->rinfo[i];
+		INIT_LIST_HEAD(&rinfo->indirect_pages);
+		INIT_LIST_HEAD(&rinfo->grants);
+		rinfo->dev_info = info;
+		INIT_WORK(&rinfo->work, blkif_restart_queue);
+		spin_lock_init(&rinfo->ring_lock);
+	}
+	return 0;
+}
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffer for communication with the backend, and
@@ -1883,9 +1920,7 @@
 			  const struct xenbus_device_id *id)
 {
 	int err, vdevice;
-	unsigned int r_index;
 	struct blkfront_info *info;
-	unsigned int backend_max_queues = 0;
 
 	/* FIXME: Use dynamic device id if this is not set. */
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
@@ -1936,33 +1971,10 @@
 	}
 
 	info->xbdev = dev;
-	/* Check if backend supports multiple queues. */
-	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
-			   "multi-queue-max-queues", "%u", &backend_max_queues);
-	if (err < 0)
-		backend_max_queues = 1;
-
-	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
-	/* We need at least one ring. */
-	if (!info->nr_rings)
-		info->nr_rings = 1;
-
-	info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
-	if (!info->rinfo) {
-		xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure");
+	err = negotiate_mq(info);
+	if (err) {
 		kfree(info);
-		return -ENOMEM;
-	}
-
-	for (r_index = 0; r_index < info->nr_rings; r_index++) {
-		struct blkfront_ring_info *rinfo;
-
-		rinfo = &info->rinfo[r_index];
-		INIT_LIST_HEAD(&rinfo->indirect_pages);
-		INIT_LIST_HEAD(&rinfo->grants);
-		rinfo->dev_info = info;
-		INIT_WORK(&rinfo->work, blkif_restart_queue);
-		spin_lock_init(&rinfo->ring_lock);
+		return err;
 	}
 
 	mutex_init(&info->mutex);
@@ -2123,12 +2135,16 @@
 static int blkfront_resume(struct xenbus_device *dev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-	int err;
+	int err = 0;
 
 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
+	err = negotiate_mq(info);
+	if (err)
+		return err;
+
 	err = talk_to_blkback(dev, info);
 
 	/*
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 129d47b..9a92c07 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -132,7 +132,7 @@
 	  and AC100/AC200 ICs.
 
 config UNIPHIER_SYSTEM_BUS
-	bool "UniPhier System Bus driver"
+	tristate "UniPhier System Bus driver"
 	depends on ARCH_UNIPHIER && OF
 	default y
 	help
diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c
index 6575c0f..c3cb76b 100644
--- a/drivers/bus/vexpress-config.c
+++ b/drivers/bus/vexpress-config.c
@@ -192,8 +192,10 @@
 	/* Need the config devices early, before the "normal" devices... */
 	for_each_compatible_node(node, NULL, "arm,vexpress,config-bus") {
 		err = vexpress_config_populate(node);
-		if (err)
+		if (err) {
+			of_node_put(node);
 			break;
+		}
 	}
 
 	return err;
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 240b6cf..be54e53 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -42,7 +42,7 @@
 /*
  * The High Precision Event Timer driver.
  * This driver is closely modelled after the rtc.c driver.
- * http://www.intel.com/hardwaredesign/hpetspec_1.pdf
+ * See HPET spec revision 1.
  */
 #define	HPET_USER_FREQ	(64)
 #define	HPET_DRIFT	(500)
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index dbf2271..ff00331 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -372,6 +372,7 @@
 config HW_RANDOM_STM32
 	tristate "STMicroelectronics STM32 random number generator"
 	depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+	depends on HAS_IOMEM
 	help
 	  This driver provides kernel-side support for the Random Number
 	  Generator hardware found on STM32 microcontrollers.
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 9fda22e..7fddd86 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -68,6 +68,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/acpi.h>
 
 #ifdef CONFIG_PARISC
 #include <asm/hardware.h>	/* for register_parisc_driver() stuff */
@@ -2054,8 +2055,6 @@
 
 #ifdef CONFIG_ACPI
 
-#include <linux/acpi.h>
-
 /*
  * Once we get an ACPI failure, we don't try any more, because we go
  * through the tables sequentially.  Once we don't find a table, there
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d0da5d8..b583e53 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1819,6 +1819,28 @@
 EXPORT_SYMBOL(get_random_int);
 
 /*
+ * Same as get_random_int(), but returns unsigned long.
+ */
+unsigned long get_random_long(void)
+{
+	__u32 *hash;
+	unsigned long ret;
+
+	if (arch_get_random_long(&ret))
+		return ret;
+
+	hash = get_cpu_var(get_random_int_hash);
+
+	hash[0] += current->pid + jiffies + random_get_entropy();
+	md5_transform(hash, random_int_secret);
+	ret = *(unsigned long *)hash;
+	put_cpu_var(get_random_int_hash);
+
+	return ret;
+}
+EXPORT_SYMBOL(get_random_long);
+
+/*
  * randomize_range() returns a start address such that
  *
  *    [...... <range> .....]
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index b038e36..bae4be6 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -43,7 +43,7 @@
 obj-$(CONFIG_COMMON_CLK_SI570)		+= clk-si570.o
 obj-$(CONFIG_COMMON_CLK_CDCE925)	+= clk-cdce925.o
 obj-$(CONFIG_ARCH_STM32)		+= clk-stm32f4.o
-obj-$(CONFIG_ARCH_TANGOX)		+= clk-tango4.o
+obj-$(CONFIG_ARCH_TANGO)		+= clk-tango4.o
 obj-$(CONFIG_CLK_TWL6040)		+= clk-twl6040.o
 obj-$(CONFIG_ARCH_U300)			+= clk-u300.o
 obj-$(CONFIG_ARCH_VT8500)		+= clk-vt8500.o
diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 19fed65..7b09a26 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -289,7 +289,7 @@
 
 	num_parents = of_clk_get_parent_count(node);
 	if (num_parents < 0)
-		return;
+		num_parents = 0;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
index cd0f272..89e9ca7 100644
--- a/drivers/clk/clk-scpi.c
+++ b/drivers/clk/clk-scpi.c
@@ -299,7 +299,7 @@
 	/* Add the virtual cpufreq device */
 	cpufreq_dev = platform_device_register_simple("scpi-cpufreq",
 						      -1, NULL, 0);
-	if (!cpufreq_dev)
+	if (IS_ERR(cpufreq_dev))
 		pr_warn("unable to register cpufreq device");
 
 	return 0;
diff --git a/drivers/clk/mvebu/dove-divider.c b/drivers/clk/mvebu/dove-divider.c
index d5c5bfa..3e0b52d 100644
--- a/drivers/clk/mvebu/dove-divider.c
+++ b/drivers/clk/mvebu/dove-divider.c
@@ -247,7 +247,7 @@
 
 void __init dove_divider_clk_init(struct device_node *np)
 {
-	void *base;
+	void __iomem *base;
 
 	base = of_iomap(np, 0);
 	if (WARN_ON(!base))
diff --git a/drivers/clk/qcom/gcc-apq8084.c b/drivers/clk/qcom/gcc-apq8084.c
index cf73e53..070037a 100644
--- a/drivers/clk/qcom/gcc-apq8084.c
+++ b/drivers/clk/qcom/gcc-apq8084.c
@@ -3587,7 +3587,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x1fc0,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_apq8084_desc = {
diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c
index b692ae8..dd5402b 100644
--- a/drivers/clk/qcom/gcc-ipq806x.c
+++ b/drivers/clk/qcom/gcc-ipq806x.c
@@ -3005,7 +3005,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x3e40,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_ipq806x_desc = {
diff --git a/drivers/clk/qcom/gcc-msm8660.c b/drivers/clk/qcom/gcc-msm8660.c
index f6a2b14..ad41303 100644
--- a/drivers/clk/qcom/gcc-msm8660.c
+++ b/drivers/clk/qcom/gcc-msm8660.c
@@ -2702,7 +2702,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x363c,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8660_desc = {
diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c
index e3bf09d..8cc9b28 100644
--- a/drivers/clk/qcom/gcc-msm8916.c
+++ b/drivers/clk/qcom/gcc-msm8916.c
@@ -3336,7 +3336,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x80000,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8916_desc = {
diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index f31111e..983dd7d 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c
@@ -3468,7 +3468,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x3660,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct regmap_config gcc_apq8064_regmap_config = {
@@ -3477,7 +3476,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x3880,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8960_desc = {
diff --git a/drivers/clk/qcom/gcc-msm8974.c b/drivers/clk/qcom/gcc-msm8974.c
index df164d6..335952d 100644
--- a/drivers/clk/qcom/gcc-msm8974.c
+++ b/drivers/clk/qcom/gcc-msm8974.c
@@ -2680,7 +2680,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x1fc0,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc gcc_msm8974_desc = {
diff --git a/drivers/clk/qcom/lcc-ipq806x.c b/drivers/clk/qcom/lcc-ipq806x.c
index 62e79fa..db3998e 100644
--- a/drivers/clk/qcom/lcc-ipq806x.c
+++ b/drivers/clk/qcom/lcc-ipq806x.c
@@ -419,7 +419,6 @@
 	.val_bits	= 32,
 	.max_register	= 0xfc,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc lcc_ipq806x_desc = {
diff --git a/drivers/clk/qcom/lcc-msm8960.c b/drivers/clk/qcom/lcc-msm8960.c
index bf95bb0..4fcf9d1 100644
--- a/drivers/clk/qcom/lcc-msm8960.c
+++ b/drivers/clk/qcom/lcc-msm8960.c
@@ -524,7 +524,6 @@
 	.val_bits	= 32,
 	.max_register	= 0xfc,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc lcc_msm8960_desc = {
diff --git a/drivers/clk/qcom/mmcc-apq8084.c b/drivers/clk/qcom/mmcc-apq8084.c
index 1e703fd..30777f9 100644
--- a/drivers/clk/qcom/mmcc-apq8084.c
+++ b/drivers/clk/qcom/mmcc-apq8084.c
@@ -3368,7 +3368,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x5104,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc mmcc_apq8084_desc = {
diff --git a/drivers/clk/qcom/mmcc-msm8960.c b/drivers/clk/qcom/mmcc-msm8960.c
index d73a048..00e3619 100644
--- a/drivers/clk/qcom/mmcc-msm8960.c
+++ b/drivers/clk/qcom/mmcc-msm8960.c
@@ -3029,7 +3029,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x334,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct regmap_config mmcc_apq8064_regmap_config = {
@@ -3038,7 +3037,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x350,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc mmcc_msm8960_desc = {
diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c
index bbe28ed..9d790bc 100644
--- a/drivers/clk/qcom/mmcc-msm8974.c
+++ b/drivers/clk/qcom/mmcc-msm8974.c
@@ -2594,7 +2594,6 @@
 	.val_bits	= 32,
 	.max_register	= 0x5104,
 	.fast_io	= true,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static const struct qcom_cc_desc mmcc_msm8974_desc = {
diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index ebce980..bc7fbac 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -133,7 +133,7 @@
 PNAME(mux_uart0_p)	= { "uart0_src", "uart0_frac", "xin24m" };
 PNAME(mux_uart1_p)	= { "uart1_src", "uart1_frac", "xin24m" };
 PNAME(mux_uart2_p)	= { "uart2_src", "uart2_frac", "xin24m" };
-PNAME(mux_mac_p)	= { "mac_pll_src", "ext_gmac" };
+PNAME(mux_mac_p)	= { "mac_pll_src", "rmii_clkin" };
 PNAME(mux_dclk_p)	= { "dclk_lcdc", "dclk_cru" };
 
 static struct rockchip_pll_clock rk3036_pll_clks[] __initdata = {
@@ -224,16 +224,16 @@
 			RK2928_CLKGATE_CON(2), 2, GFLAGS),
 
 	COMPOSITE_NODIV(SCLK_TIMER0, "sclk_timer0", mux_timer_p, CLK_IGNORE_UNUSED,
-			RK2928_CLKSEL_CON(2), 4, 1, DFLAGS,
+			RK2928_CLKSEL_CON(2), 4, 1, MFLAGS,
 			RK2928_CLKGATE_CON(1), 0, GFLAGS),
 	COMPOSITE_NODIV(SCLK_TIMER1, "sclk_timer1", mux_timer_p, CLK_IGNORE_UNUSED,
-			RK2928_CLKSEL_CON(2), 5, 1, DFLAGS,
+			RK2928_CLKSEL_CON(2), 5, 1, MFLAGS,
 			RK2928_CLKGATE_CON(1), 1, GFLAGS),
 	COMPOSITE_NODIV(SCLK_TIMER2, "sclk_timer2", mux_timer_p, CLK_IGNORE_UNUSED,
-			RK2928_CLKSEL_CON(2), 6, 1, DFLAGS,
+			RK2928_CLKSEL_CON(2), 6, 1, MFLAGS,
 			RK2928_CLKGATE_CON(2), 4, GFLAGS),
 	COMPOSITE_NODIV(SCLK_TIMER3, "sclk_timer3", mux_timer_p, CLK_IGNORE_UNUSED,
-			RK2928_CLKSEL_CON(2), 7, 1, DFLAGS,
+			RK2928_CLKSEL_CON(2), 7, 1, MFLAGS,
 			RK2928_CLKGATE_CON(2), 5, GFLAGS),
 
 	MUX(0, "uart_pll_clk", mux_pll_src_apll_dpll_gpll_usb480m_p, 0,
@@ -242,11 +242,11 @@
 			RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
 			RK2928_CLKGATE_CON(1), 8, GFLAGS),
 	COMPOSITE_NOMUX(0, "uart1_src", "uart_pll_clk", 0,
-			RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
-			RK2928_CLKGATE_CON(1), 8, GFLAGS),
+			RK2928_CLKSEL_CON(14), 0, 7, DFLAGS,
+			RK2928_CLKGATE_CON(1), 10, GFLAGS),
 	COMPOSITE_NOMUX(0, "uart2_src", "uart_pll_clk", 0,
-			RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
-			RK2928_CLKGATE_CON(1), 8, GFLAGS),
+			RK2928_CLKSEL_CON(15), 0, 7, DFLAGS,
+			RK2928_CLKGATE_CON(1), 12, GFLAGS),
 	COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(17), 0,
 			RK2928_CLKGATE_CON(1), 9, GFLAGS,
@@ -279,13 +279,13 @@
 			RK2928_CLKGATE_CON(3), 2, GFLAGS),
 
 	COMPOSITE_NODIV(0, "sclk_sdmmc_src", mux_mmc_src_p, 0,
-			RK2928_CLKSEL_CON(12), 8, 2, DFLAGS,
+			RK2928_CLKSEL_CON(12), 8, 2, MFLAGS,
 			RK2928_CLKGATE_CON(2), 11, GFLAGS),
 	DIV(SCLK_SDMMC, "sclk_sdmmc", "sclk_sdmmc_src", 0,
 			RK2928_CLKSEL_CON(11), 0, 7, DFLAGS),
 
 	COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0,
-			RK2928_CLKSEL_CON(12), 10, 2, DFLAGS,
+			RK2928_CLKSEL_CON(12), 10, 2, MFLAGS,
 			RK2928_CLKGATE_CON(2), 13, GFLAGS),
 	DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0,
 			RK2928_CLKSEL_CON(11), 8, 7, DFLAGS),
@@ -344,12 +344,12 @@
 			RK2928_CLKGATE_CON(10), 5, GFLAGS),
 
 	COMPOSITE_NOGATE(0, "mac_pll_src", mux_pll_src_3plls_p, 0,
-			RK2928_CLKSEL_CON(21), 0, 2, MFLAGS, 4, 5, DFLAGS),
+			RK2928_CLKSEL_CON(21), 0, 2, MFLAGS, 9, 5, DFLAGS),
 	MUX(SCLK_MACREF, "mac_clk_ref", mux_mac_p, CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(21), 3, 1, MFLAGS),
 
 	COMPOSITE_NOMUX(SCLK_MAC, "mac_clk", "mac_clk_ref", 0,
-			RK2928_CLKSEL_CON(21), 9, 5, DFLAGS,
+			RK2928_CLKSEL_CON(21), 4, 5, DFLAGS,
 			RK2928_CLKGATE_CON(2), 6, GFLAGS),
 
 	MUX(SCLK_HDMI, "dclk_hdmi", mux_dclk_p, 0,
diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index be0ede5..21f3ea9 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -780,13 +780,13 @@
 	GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK3368_CLKGATE_CON(20), 0, GFLAGS),
 
 	/* pclk_pd_alive gates */
-	GATE(PCLK_TIMER1, "pclk_timer1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 8, GFLAGS),
-	GATE(PCLK_TIMER0, "pclk_timer0", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 7, GFLAGS),
-	GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(14), 12, GFLAGS),
-	GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(14), 11, GFLAGS),
-	GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 3, GFLAGS),
-	GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 2, GFLAGS),
-	GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(14), 1, GFLAGS),
+	GATE(PCLK_TIMER1, "pclk_timer1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 13, GFLAGS),
+	GATE(PCLK_TIMER0, "pclk_timer0", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 12, GFLAGS),
+	GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 9, GFLAGS),
+	GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 8, GFLAGS),
+	GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 3, GFLAGS),
+	GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 2, GFLAGS),
+	GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 1, GFLAGS),
 
 	/*
 	 * pclk_vio gates
@@ -796,12 +796,12 @@
 	GATE(0, "pclk_dphytx", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(14), 8, GFLAGS),
 
 	/* pclk_pd_pmu gates */
-	GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 0, GFLAGS),
-	GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3368_CLKGATE_CON(17), 4, GFLAGS),
-	GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 3, GFLAGS),
-	GATE(0, "pclk_pmu_noc", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 2, GFLAGS),
-	GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 1, GFLAGS),
-	GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 2, GFLAGS),
+	GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 5, GFLAGS),
+	GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3368_CLKGATE_CON(23), 4, GFLAGS),
+	GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 3, GFLAGS),
+	GATE(0, "pclk_pmu_noc", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 2, GFLAGS),
+	GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 1, GFLAGS),
+	GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 0, GFLAGS),
 
 	/* timer gates */
 	GATE(0, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS),
diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c
index e1fe8f3..74e7544 100644
--- a/drivers/clk/tegra/clk-emc.c
+++ b/drivers/clk/tegra/clk-emc.c
@@ -450,8 +450,10 @@
 		struct emc_timing *timing = tegra->timings + (i++);
 
 		err = load_one_timing_from_dt(tegra, timing, child);
-		if (err)
+		if (err) {
+			of_node_put(child);
 			return err;
+		}
 
 		timing->ram_code = ram_code;
 	}
@@ -499,9 +501,9 @@
 		 * fuses until the apbmisc driver is loaded.
 		 */
 		err = load_timings_from_dt(tegra, node, node_ram_code);
+		of_node_put(node);
 		if (err)
 			return ERR_PTR(err);
-		of_node_put(node);
 		break;
 	}
 
diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h
index 19ce073..62ea381 100644
--- a/drivers/clk/tegra/clk-id.h
+++ b/drivers/clk/tegra/clk-id.h
@@ -11,6 +11,7 @@
 	tegra_clk_afi,
 	tegra_clk_amx,
 	tegra_clk_amx1,
+	tegra_clk_apb2ape,
 	tegra_clk_apbdma,
 	tegra_clk_apbif,
 	tegra_clk_ape,
diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index a534bfa..6ac3f84 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -86,15 +86,21 @@
 #define PLLE_SS_DISABLE (PLLE_SS_CNTL_BYPASS_SS | PLLE_SS_CNTL_INTERP_RESET |\
 				PLLE_SS_CNTL_SSC_BYP)
 #define PLLE_SS_MAX_MASK 0x1ff
-#define PLLE_SS_MAX_VAL 0x25
+#define PLLE_SS_MAX_VAL_TEGRA114 0x25
+#define PLLE_SS_MAX_VAL_TEGRA210 0x21
 #define PLLE_SS_INC_MASK (0xff << 16)
 #define PLLE_SS_INC_VAL (0x1 << 16)
 #define PLLE_SS_INCINTRV_MASK (0x3f << 24)
-#define PLLE_SS_INCINTRV_VAL (0x20 << 24)
+#define PLLE_SS_INCINTRV_VAL_TEGRA114 (0x20 << 24)
+#define PLLE_SS_INCINTRV_VAL_TEGRA210 (0x23 << 24)
 #define PLLE_SS_COEFFICIENTS_MASK \
 	(PLLE_SS_MAX_MASK | PLLE_SS_INC_MASK | PLLE_SS_INCINTRV_MASK)
-#define PLLE_SS_COEFFICIENTS_VAL \
-	(PLLE_SS_MAX_VAL | PLLE_SS_INC_VAL | PLLE_SS_INCINTRV_VAL)
+#define PLLE_SS_COEFFICIENTS_VAL_TEGRA114 \
+	(PLLE_SS_MAX_VAL_TEGRA114 | PLLE_SS_INC_VAL |\
+	 PLLE_SS_INCINTRV_VAL_TEGRA114)
+#define PLLE_SS_COEFFICIENTS_VAL_TEGRA210 \
+	(PLLE_SS_MAX_VAL_TEGRA210 | PLLE_SS_INC_VAL |\
+	 PLLE_SS_INCINTRV_VAL_TEGRA210)
 
 #define PLLE_AUX_PLLP_SEL	BIT(2)
 #define PLLE_AUX_USE_LOCKDET	BIT(3)
@@ -880,7 +886,7 @@
 static int clk_plle_enable(struct clk_hw *hw)
 {
 	struct tegra_clk_pll *pll = to_clk_pll(hw);
-	unsigned long input_rate = clk_get_rate(clk_get_parent(hw->clk));
+	unsigned long input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 	struct tegra_clk_pll_freq_table sel;
 	u32 val;
 	int err;
@@ -1378,7 +1384,7 @@
 	u32 val;
 	int ret;
 	unsigned long flags = 0;
-	unsigned long input_rate = clk_get_rate(clk_get_parent(hw->clk));
+	unsigned long input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 
 	if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate))
 		return -EINVAL;
@@ -1401,7 +1407,7 @@
 	val |= PLLE_MISC_IDDQ_SW_CTRL;
 	val &= ~PLLE_MISC_IDDQ_SW_VALUE;
 	val |= PLLE_MISC_PLLE_PTS;
-	val |= PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK;
+	val &= ~(PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK);
 	pll_writel_misc(val, pll);
 	udelay(5);
 
@@ -1428,7 +1434,7 @@
 	val = pll_readl(PLLE_SS_CTRL, pll);
 	val &= ~(PLLE_SS_CNTL_CENTER | PLLE_SS_CNTL_INVERT);
 	val &= ~PLLE_SS_COEFFICIENTS_MASK;
-	val |= PLLE_SS_COEFFICIENTS_VAL;
+	val |= PLLE_SS_COEFFICIENTS_VAL_TEGRA114;
 	pll_writel(val, PLLE_SS_CTRL, pll);
 	val &= ~(PLLE_SS_CNTL_SSC_BYP | PLLE_SS_CNTL_BYPASS_SS);
 	pll_writel(val, PLLE_SS_CTRL, pll);
@@ -2012,9 +2018,9 @@
 	struct tegra_clk_pll *pll = to_clk_pll(hw);
 	struct tegra_clk_pll_freq_table sel;
 	u32 val;
-	int ret;
+	int ret = 0;
 	unsigned long flags = 0;
-	unsigned long input_rate = clk_get_rate(clk_get_parent(hw->clk));
+	unsigned long input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 
 	if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate))
 		return -EINVAL;
@@ -2022,22 +2028,20 @@
 	if (pll->lock)
 		spin_lock_irqsave(pll->lock, flags);
 
+	val = pll_readl(pll->params->aux_reg, pll);
+	if (val & PLLE_AUX_SEQ_ENABLE)
+		goto out;
+
 	val = pll_readl_base(pll);
 	val &= ~BIT(30); /* Disable lock override */
 	pll_writel_base(val, pll);
 
-	val = pll_readl(pll->params->aux_reg, pll);
-	val |= PLLE_AUX_ENABLE_SWCTL;
-	val &= ~PLLE_AUX_SEQ_ENABLE;
-	pll_writel(val, pll->params->aux_reg, pll);
-	udelay(1);
-
 	val = pll_readl_misc(pll);
 	val |= PLLE_MISC_LOCK_ENABLE;
 	val |= PLLE_MISC_IDDQ_SW_CTRL;
 	val &= ~PLLE_MISC_IDDQ_SW_VALUE;
 	val |= PLLE_MISC_PLLE_PTS;
-	val |= PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK;
+	val &= ~(PLLE_MISC_VREG_BG_CTRL_MASK | PLLE_MISC_VREG_CTRL_MASK);
 	pll_writel_misc(val, pll);
 	udelay(5);
 
@@ -2067,7 +2071,7 @@
 	val = pll_readl(PLLE_SS_CTRL, pll);
 	val &= ~(PLLE_SS_CNTL_CENTER | PLLE_SS_CNTL_INVERT);
 	val &= ~PLLE_SS_COEFFICIENTS_MASK;
-	val |= PLLE_SS_COEFFICIENTS_VAL;
+	val |= PLLE_SS_COEFFICIENTS_VAL_TEGRA210;
 	pll_writel(val, PLLE_SS_CTRL, pll);
 	val &= ~(PLLE_SS_CNTL_SSC_BYP | PLLE_SS_CNTL_BYPASS_SS);
 	pll_writel(val, PLLE_SS_CTRL, pll);
@@ -2104,15 +2108,25 @@
 	if (pll->lock)
 		spin_lock_irqsave(pll->lock, flags);
 
+	/* If PLLE HW sequencer is enabled, SW should not disable PLLE */
+	val = pll_readl(pll->params->aux_reg, pll);
+	if (val & PLLE_AUX_SEQ_ENABLE)
+		goto out;
+
 	val = pll_readl_base(pll);
 	val &= ~PLLE_BASE_ENABLE;
 	pll_writel_base(val, pll);
 
+	val = pll_readl(pll->params->aux_reg, pll);
+	val |= PLLE_AUX_ENABLE_SWCTL | PLLE_AUX_SS_SWCTL;
+	pll_writel(val, pll->params->aux_reg, pll);
+
 	val = pll_readl_misc(pll);
 	val |= PLLE_MISC_IDDQ_SW_CTRL | PLLE_MISC_IDDQ_SW_VALUE;
 	pll_writel_misc(val, pll);
 	udelay(1);
 
+out:
 	if (pll->lock)
 		spin_unlock_irqrestore(pll->lock, flags);
 }
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 6ad381a..ea2b9cbf 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -773,7 +773,7 @@
 	XUSB("xusb_dev_src", mux_clkm_pllp_pllc_pllre, CLK_SOURCE_XUSB_DEV_SRC, 95, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_dev_src),
 	XUSB("xusb_dev_src", mux_clkm_pllp_pllre, CLK_SOURCE_XUSB_DEV_SRC, 95, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_dev_src_8),
 	MUX8("dbgapb", mux_pllp_clkm_2, CLK_SOURCE_DBGAPB, 185, TEGRA_PERIPH_NO_RESET, tegra_clk_dbgapb),
-	MUX8("msenc", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVENC, 219, 0, tegra_clk_nvenc),
+	MUX8("nvenc", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVENC, 219, 0, tegra_clk_nvenc),
 	MUX8("nvdec", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVDEC, 194, 0, tegra_clk_nvdec),
 	MUX8("nvjpg", mux_pllc2_c_c3_pllp_plla1_clkm, CLK_SOURCE_NVJPG, 195, 0, tegra_clk_nvjpg),
 	MUX8("ape", mux_plla_pllc4_out0_pllc_pllc4_out1_pllp_pllc4_out2_clkm, CLK_SOURCE_APE, 198, TEGRA_PERIPH_ON_APB, tegra_clk_ape),
@@ -782,7 +782,7 @@
 	NODIV("sor1", mux_clkm_sor1_brick_sor1_src, CLK_SOURCE_SOR1, 15, MASK(1), 183, 0, tegra_clk_sor1, &sor1_lock),
 	MUX8("sdmmc_legacy", mux_pllp_out3_clkm_pllp_pllc4, CLK_SOURCE_SDMMC_LEGACY, 193, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_sdmmc_legacy),
 	MUX8("qspi", mux_pllp_pllc_pllc_out1_pllc4_out2_pllc4_out1_clkm_pllc4_out0, CLK_SOURCE_QSPI, 211, TEGRA_PERIPH_ON_APB, tegra_clk_qspi),
-	MUX("vii2c", mux_pllp_pllc_clkm, CLK_SOURCE_VI_I2C, 208, TEGRA_PERIPH_ON_APB, tegra_clk_vi_i2c),
+	I2C("vii2c", mux_pllp_pllc_clkm, CLK_SOURCE_VI_I2C, 208, tegra_clk_vi_i2c),
 	MUX("mipibif", mux_pllp_clkm, CLK_SOURCE_MIPIBIF, 173, TEGRA_PERIPH_ON_APB, tegra_clk_mipibif),
 	MUX("uartape", mux_pllp_pllc_clkm, CLK_SOURCE_UARTAPE, 212, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_uartape),
 	MUX8("tsecb", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_TSECB, 206, 0, tegra_clk_tsecb),
@@ -829,6 +829,7 @@
 	GATE("xusb_gate", "osc", 143, 0, tegra_clk_xusb_gate, 0),
 	GATE("pll_p_out_cpu", "pll_p", 223, 0, tegra_clk_pll_p_out_cpu, 0),
 	GATE("pll_p_out_adsp", "pll_p", 187, 0, tegra_clk_pll_p_out_adsp, 0),
+	GATE("apb2ape", "clk_m", 107, 0, tegra_clk_apb2ape, 0),
 };
 
 static struct tegra_periph_init_data div_clks[] = {
diff --git a/drivers/clk/tegra/clk-tegra-super-gen4.c b/drivers/clk/tegra/clk-tegra-super-gen4.c
index 4559a20..474de0f 100644
--- a/drivers/clk/tegra/clk-tegra-super-gen4.c
+++ b/drivers/clk/tegra/clk-tegra-super-gen4.c
@@ -67,7 +67,7 @@
 					 "pll_p", "pll_p_out4", "unused",
 					 "unused", "pll_x", "pll_x_out0" };
 
-const struct tegra_super_gen_info tegra_super_gen_info_gen4 = {
+static const struct tegra_super_gen_info tegra_super_gen_info_gen4 = {
 	.gen = gen4,
 	.sclk_parents = sclk_parents,
 	.cclk_g_parents = cclk_g_parents,
@@ -93,7 +93,7 @@
 					"unused", "unused", "unused", "unused",
 					"dfllCPU_out" };
 
-const struct tegra_super_gen_info tegra_super_gen_info_gen5 = {
+static const struct tegra_super_gen_info tegra_super_gen_info_gen5 = {
 	.gen = gen5,
 	.sclk_parents = sclk_parents_gen5,
 	.cclk_g_parents = cclk_g_parents_gen5,
@@ -171,7 +171,7 @@
 	*dt_clk = clk;
 }
 
-void __init tegra_super_clk_init(void __iomem *clk_base,
+static void __init tegra_super_clk_init(void __iomem *clk_base,
 				void __iomem *pmc_base,
 				struct tegra_clk *tegra_clks,
 				struct tegra_clk_pll_params *params,
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 58514c4..637041f 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -59,8 +59,8 @@
 #define PLLC3_MISC3 0x50c
 
 #define PLLM_BASE 0x90
-#define PLLM_MISC0 0x9c
 #define PLLM_MISC1 0x98
+#define PLLM_MISC2 0x9c
 #define PLLP_BASE 0xa0
 #define PLLP_MISC0 0xac
 #define PLLP_MISC1 0x680
@@ -99,7 +99,7 @@
 #define PLLC4_MISC0 0x5a8
 #define PLLC4_OUT 0x5e4
 #define PLLMB_BASE 0x5e8
-#define PLLMB_MISC0 0x5ec
+#define PLLMB_MISC1 0x5ec
 #define PLLA1_BASE 0x6a4
 #define PLLA1_MISC0 0x6a8
 #define PLLA1_MISC1 0x6ac
@@ -243,7 +243,8 @@
 };
 
 static const char *mux_pllmcp_clkm[] = {
-	"pll_m", "pll_c", "pll_p", "clk_m", "pll_m_ud", "pll_c2", "pll_c3",
+	"pll_m", "pll_c", "pll_p", "clk_m", "pll_m_ud", "pll_mb", "pll_mb",
+	"pll_p",
 };
 #define mux_pllmcp_clkm_idx NULL
 
@@ -367,12 +368,12 @@
 /* PLLMB */
 #define PLLMB_BASE_LOCK			(1 << 27)
 
-#define PLLMB_MISC0_LOCK_OVERRIDE	(1 << 18)
-#define PLLMB_MISC0_IDDQ		(1 << 17)
-#define PLLMB_MISC0_LOCK_ENABLE		(1 << 16)
+#define PLLMB_MISC1_LOCK_OVERRIDE	(1 << 18)
+#define PLLMB_MISC1_IDDQ		(1 << 17)
+#define PLLMB_MISC1_LOCK_ENABLE		(1 << 16)
 
-#define PLLMB_MISC0_DEFAULT_VALUE	0x00030000
-#define PLLMB_MISC0_WRITE_MASK		0x0007ffff
+#define PLLMB_MISC1_DEFAULT_VALUE	0x00030000
+#define PLLMB_MISC1_WRITE_MASK		0x0007ffff
 
 /* PLLP */
 #define PLLP_BASE_OVERRIDE		(1 << 28)
@@ -457,7 +458,8 @@
 			PLLCX_MISC3_WRITE_MASK);
 }
 
-void tegra210_pllcx_set_defaults(const char *name, struct tegra_clk_pll *pllcx)
+static void tegra210_pllcx_set_defaults(const char *name,
+					struct tegra_clk_pll *pllcx)
 {
 	pllcx->params->defaults_set = true;
 
@@ -482,22 +484,22 @@
 	udelay(1);
 }
 
-void _pllc_set_defaults(struct tegra_clk_pll *pllcx)
+static void _pllc_set_defaults(struct tegra_clk_pll *pllcx)
 {
 	tegra210_pllcx_set_defaults("PLL_C", pllcx);
 }
 
-void _pllc2_set_defaults(struct tegra_clk_pll *pllcx)
+static void _pllc2_set_defaults(struct tegra_clk_pll *pllcx)
 {
 	tegra210_pllcx_set_defaults("PLL_C2", pllcx);
 }
 
-void _pllc3_set_defaults(struct tegra_clk_pll *pllcx)
+static void _pllc3_set_defaults(struct tegra_clk_pll *pllcx)
 {
 	tegra210_pllcx_set_defaults("PLL_C3", pllcx);
 }
 
-void _plla1_set_defaults(struct tegra_clk_pll *pllcx)
+static void _plla1_set_defaults(struct tegra_clk_pll *pllcx)
 {
 	tegra210_pllcx_set_defaults("PLL_A1", pllcx);
 }
@@ -507,7 +509,7 @@
  * PLL with dynamic ramp and fractional SDM. Dynamic ramp is not used.
  * Fractional SDM is allowed to provide exact audio rates.
  */
-void tegra210_plla_set_defaults(struct tegra_clk_pll *plla)
+static void tegra210_plla_set_defaults(struct tegra_clk_pll *plla)
 {
 	u32 mask;
 	u32 val = readl_relaxed(clk_base + plla->params->base_reg);
@@ -559,7 +561,7 @@
  * PLLD
  * PLL with fractional SDM.
  */
-void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
+static void tegra210_plld_set_defaults(struct tegra_clk_pll *plld)
 {
 	u32 val;
 	u32 mask = 0xffff;
@@ -698,7 +700,7 @@
 	udelay(1);
 }
 
-void tegra210_plld2_set_defaults(struct tegra_clk_pll *plld2)
+static void tegra210_plld2_set_defaults(struct tegra_clk_pll *plld2)
 {
 	plldss_defaults("PLL_D2", plld2, PLLD2_MISC0_DEFAULT_VALUE,
 			PLLD2_MISC1_CFG_DEFAULT_VALUE,
@@ -706,7 +708,7 @@
 			PLLD2_MISC3_CTRL2_DEFAULT_VALUE);
 }
 
-void tegra210_plldp_set_defaults(struct tegra_clk_pll *plldp)
+static void tegra210_plldp_set_defaults(struct tegra_clk_pll *plldp)
 {
 	plldss_defaults("PLL_DP", plldp, PLLDP_MISC0_DEFAULT_VALUE,
 			PLLDP_MISC1_CFG_DEFAULT_VALUE,
@@ -719,7 +721,7 @@
  * Base and misc0 layout is the same as PLLD2/PLLDP, but no SDM/SSC support.
  * VCO is exposed to the clock tree via fixed 1/3 and 1/5 dividers.
  */
-void tegra210_pllc4_set_defaults(struct tegra_clk_pll *pllc4)
+static void tegra210_pllc4_set_defaults(struct tegra_clk_pll *pllc4)
 {
 	plldss_defaults("PLL_C4", pllc4, PLLC4_MISC0_DEFAULT_VALUE, 0, 0, 0);
 }
@@ -728,7 +730,7 @@
  * PLLRE
  * VCO is exposed to the clock tree directly along with post-divider output
  */
-void tegra210_pllre_set_defaults(struct tegra_clk_pll *pllre)
+static void tegra210_pllre_set_defaults(struct tegra_clk_pll *pllre)
 {
 	u32 mask;
 	u32 val = readl_relaxed(clk_base + pllre->params->base_reg);
@@ -780,13 +782,13 @@
 {
 	unsigned long input_rate;
 
-	if (!IS_ERR_OR_NULL(hw->clk)) {
+	/* cf rate */
+	if (!IS_ERR_OR_NULL(hw->clk))
 		input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
-		/* cf rate */
-		input_rate /= tegra_pll_get_fixed_mdiv(hw, input_rate);
-	} else {
+	else
 		input_rate = 38400000;
-	}
+
+	input_rate /= tegra_pll_get_fixed_mdiv(hw, input_rate);
 
 	switch (input_rate) {
 	case 12000000:
@@ -841,7 +843,7 @@
 			PLLX_MISC5_WRITE_MASK);
 }
 
-void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
+static void tegra210_pllx_set_defaults(struct tegra_clk_pll *pllx)
 {
 	u32 val;
 	u32 step_a, step_b;
@@ -901,7 +903,7 @@
 }
 
 /* PLLMB */
-void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
+static void tegra210_pllmb_set_defaults(struct tegra_clk_pll *pllmb)
 {
 	u32 mask, val = readl_relaxed(clk_base + pllmb->params->base_reg);
 
@@ -914,15 +916,15 @@
 		 * PLL is ON: check if defaults already set, then set those
 		 * that can be updated in flight.
 		 */
-		val = PLLMB_MISC0_DEFAULT_VALUE & (~PLLMB_MISC0_IDDQ);
-		mask = PLLMB_MISC0_LOCK_ENABLE | PLLMB_MISC0_LOCK_OVERRIDE;
+		val = PLLMB_MISC1_DEFAULT_VALUE & (~PLLMB_MISC1_IDDQ);
+		mask = PLLMB_MISC1_LOCK_ENABLE | PLLMB_MISC1_LOCK_OVERRIDE;
 		_pll_misc_chk_default(clk_base, pllmb->params, 0, val,
-				~mask & PLLMB_MISC0_WRITE_MASK);
+				~mask & PLLMB_MISC1_WRITE_MASK);
 
 		/* Enable lock detect */
 		val = readl_relaxed(clk_base + pllmb->params->ext_misc_reg[0]);
 		val &= ~mask;
-		val |= PLLMB_MISC0_DEFAULT_VALUE & mask;
+		val |= PLLMB_MISC1_DEFAULT_VALUE & mask;
 		writel_relaxed(val, clk_base + pllmb->params->ext_misc_reg[0]);
 		udelay(1);
 
@@ -930,7 +932,7 @@
 	}
 
 	/* set IDDQ, enable lock detect */
-	writel_relaxed(PLLMB_MISC0_DEFAULT_VALUE,
+	writel_relaxed(PLLMB_MISC1_DEFAULT_VALUE,
 			clk_base + pllmb->params->ext_misc_reg[0]);
 	udelay(1);
 }
@@ -960,7 +962,7 @@
 			~mask & PLLP_MISC1_WRITE_MASK);
 }
 
-void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
+static void tegra210_pllp_set_defaults(struct tegra_clk_pll *pllp)
 {
 	u32 mask;
 	u32 val = readl_relaxed(clk_base + pllp->params->base_reg);
@@ -1022,7 +1024,7 @@
 			~mask & PLLU_MISC1_WRITE_MASK);
 }
 
-void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
+static void tegra210_pllu_set_defaults(struct tegra_clk_pll *pllu)
 {
 	u32 val = readl_relaxed(clk_base + pllu->params->base_reg);
 
@@ -1212,8 +1214,9 @@
 	cfg->m *= PLL_SDM_COEFF;
 }
 
-unsigned long tegra210_clk_adjust_vco_min(struct tegra_clk_pll_params *params,
-					  unsigned long parent_rate)
+static unsigned long
+tegra210_clk_adjust_vco_min(struct tegra_clk_pll_params *params,
+			    unsigned long parent_rate)
 {
 	unsigned long vco_min = params->vco_min;
 
@@ -1386,7 +1389,7 @@
 	.mdiv_default = 3,
 	.div_nmp = &pllc_nmp,
 	.freq_table = pll_cx_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.set_defaults = _pllc_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1425,7 +1428,7 @@
 	.ext_misc_reg[2] = PLLC2_MISC2,
 	.ext_misc_reg[3] = PLLC2_MISC3,
 	.freq_table = pll_cx_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.set_defaults = _pllc2_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1455,7 +1458,7 @@
 	.ext_misc_reg[2] = PLLC3_MISC2,
 	.ext_misc_reg[3] = PLLC3_MISC3,
 	.freq_table = pll_cx_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.set_defaults = _pllc3_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1505,7 +1508,6 @@
 	.base_reg = PLLC4_BASE,
 	.misc_reg = PLLC4_MISC0,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLSS_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
 	.max_p = PLL_QLIN_PDIV_MAX,
 	.ext_misc_reg[0] = PLLC4_MISC0,
@@ -1517,8 +1519,7 @@
 	.div_nmp = &pllss_nmp,
 	.freq_table = pll_c4_vco_freq_table,
 	.set_defaults = tegra210_pllc4_set_defaults,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE |
-		 TEGRA_PLL_VCO_OUT,
+	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
 
@@ -1559,15 +1560,15 @@
 	.vco_min = 800000000,
 	.vco_max = 1866000000,
 	.base_reg = PLLM_BASE,
-	.misc_reg = PLLM_MISC1,
+	.misc_reg = PLLM_MISC2,
 	.lock_mask = PLL_BASE_LOCK,
 	.lock_enable_bit_idx = PLLM_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
-	.iddq_reg = PLLM_MISC0,
+	.iddq_reg = PLLM_MISC2,
 	.iddq_bit_idx = PLLM_IDDQ_BIT,
 	.max_p = PLL_QLIN_PDIV_MAX,
-	.ext_misc_reg[0] = PLLM_MISC0,
-	.ext_misc_reg[0] = PLLM_MISC1,
+	.ext_misc_reg[0] = PLLM_MISC2,
+	.ext_misc_reg[1] = PLLM_MISC1,
 	.round_p_to_pdiv = pll_qlin_p_to_pdiv,
 	.pdiv_tohw = pll_qlin_pdiv_to_hw,
 	.div_nmp = &pllm_nmp,
@@ -1586,19 +1587,18 @@
 	.vco_min = 800000000,
 	.vco_max = 1866000000,
 	.base_reg = PLLMB_BASE,
-	.misc_reg = PLLMB_MISC0,
+	.misc_reg = PLLMB_MISC1,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLMB_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
-	.iddq_reg = PLLMB_MISC0,
+	.iddq_reg = PLLMB_MISC1,
 	.iddq_bit_idx = PLLMB_IDDQ_BIT,
 	.max_p = PLL_QLIN_PDIV_MAX,
-	.ext_misc_reg[0] = PLLMB_MISC0,
+	.ext_misc_reg[0] = PLLMB_MISC1,
 	.round_p_to_pdiv = pll_qlin_p_to_pdiv,
 	.pdiv_tohw = pll_qlin_pdiv_to_hw,
 	.div_nmp = &pllm_nmp,
 	.freq_table = pll_m_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.set_defaults = tegra210_pllmb_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1671,7 +1671,6 @@
 	.base_reg = PLLRE_BASE,
 	.misc_reg = PLLRE_MISC0,
 	.lock_mask = PLLRE_MISC_LOCK,
-	.lock_enable_bit_idx = PLLRE_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
 	.max_p = PLL_QLIN_PDIV_MAX,
 	.ext_misc_reg[0] = PLLRE_MISC0,
@@ -1681,8 +1680,7 @@
 	.pdiv_tohw = pll_qlin_pdiv_to_hw,
 	.div_nmp = &pllre_nmp,
 	.freq_table = pll_re_vco_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_LOCK_MISC |
-		 TEGRA_PLL_HAS_LOCK_ENABLE | TEGRA_PLL_VCO_OUT,
+	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_LOCK_MISC | TEGRA_PLL_VCO_OUT,
 	.set_defaults = tegra210_pllre_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1712,7 +1710,6 @@
 	.base_reg = PLLP_BASE,
 	.misc_reg = PLLP_MISC0,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLP_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
 	.iddq_reg = PLLP_MISC0,
 	.iddq_bit_idx = PLLXP_IDDQ_BIT,
@@ -1721,8 +1718,7 @@
 	.div_nmp = &pllp_nmp,
 	.freq_table = pll_p_freq_table,
 	.fixed_rate = 408000000,
-	.flags = TEGRA_PLL_FIXED | TEGRA_PLL_USE_LOCK |
-		 TEGRA_PLL_HAS_LOCK_ENABLE | TEGRA_PLL_VCO_OUT,
+	.flags = TEGRA_PLL_FIXED | TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
 	.set_defaults = tegra210_pllp_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1750,7 +1746,7 @@
 	.ext_misc_reg[2] = PLLA1_MISC2,
 	.ext_misc_reg[3] = PLLA1_MISC3,
 	.freq_table = pll_cx_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.set_defaults = _plla1_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -1787,7 +1783,6 @@
 	.base_reg = PLLA_BASE,
 	.misc_reg = PLLA_MISC0,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLA_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
 	.round_p_to_pdiv = pll_qlin_p_to_pdiv,
 	.pdiv_tohw = pll_qlin_pdiv_to_hw,
@@ -1802,8 +1797,7 @@
 	.ext_misc_reg[1] = PLLA_MISC1,
 	.ext_misc_reg[2] = PLLA_MISC2,
 	.freq_table = pll_a_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_MDIV_NEW |
-		 TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK | TEGRA_MDIV_NEW,
 	.set_defaults = tegra210_plla_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 	.set_gain = tegra210_clk_pll_set_gain,
@@ -1836,7 +1830,6 @@
 	.base_reg = PLLD_BASE,
 	.misc_reg = PLLD_MISC0,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLD_MISC_LOCK_ENABLE,
 	.lock_delay = 1000,
 	.iddq_reg = PLLD_MISC0,
 	.iddq_bit_idx = PLLD_IDDQ_BIT,
@@ -1850,7 +1843,7 @@
 	.ext_misc_reg[0] = PLLD_MISC0,
 	.ext_misc_reg[1] = PLLD_MISC1,
 	.freq_table = pll_d_freq_table,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.mdiv_default = 1,
 	.set_defaults = tegra210_plld_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
@@ -1876,7 +1869,6 @@
 	.base_reg = PLLD2_BASE,
 	.misc_reg = PLLD2_MISC0,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLSS_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
 	.iddq_reg = PLLD2_BASE,
 	.iddq_bit_idx = PLLSS_IDDQ_BIT,
@@ -1897,7 +1889,7 @@
 	.mdiv_default = 1,
 	.freq_table = tegra210_pll_d2_freq_table,
 	.set_defaults = tegra210_plld2_set_defaults,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 	.set_gain = tegra210_clk_pll_set_gain,
 	.adjust_vco = tegra210_clk_adjust_vco_min,
@@ -1920,7 +1912,6 @@
 	.base_reg = PLLDP_BASE,
 	.misc_reg = PLLDP_MISC,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLSS_MISC_LOCK_ENABLE,
 	.lock_delay = 300,
 	.iddq_reg = PLLDP_BASE,
 	.iddq_bit_idx = PLLSS_IDDQ_BIT,
@@ -1941,7 +1932,7 @@
 	.mdiv_default = 1,
 	.freq_table = pll_dp_freq_table,
 	.set_defaults = tegra210_plldp_set_defaults,
-	.flags = TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+	.flags = TEGRA_PLL_USE_LOCK,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 	.set_gain = tegra210_clk_pll_set_gain,
 	.adjust_vco = tegra210_clk_adjust_vco_min,
@@ -1973,7 +1964,6 @@
 	.base_reg = PLLU_BASE,
 	.misc_reg = PLLU_MISC0,
 	.lock_mask = PLL_BASE_LOCK,
-	.lock_enable_bit_idx = PLLU_MISC_LOCK_ENABLE,
 	.lock_delay = 1000,
 	.iddq_reg = PLLU_MISC0,
 	.iddq_bit_idx = PLLU_IDDQ_BIT,
@@ -1983,8 +1973,7 @@
 	.pdiv_tohw = pll_qlin_pdiv_to_hw,
 	.div_nmp = &pllu_nmp,
 	.freq_table = pll_u_freq_table,
-	.flags = TEGRA_PLLU | TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE |
-		 TEGRA_PLL_VCO_OUT,
+	.flags = TEGRA_PLLU | TEGRA_PLL_USE_LOCK | TEGRA_PLL_VCO_OUT,
 	.set_defaults = tegra210_pllu_set_defaults,
 	.calc_rate = tegra210_pll_fixed_mdiv_cfg,
 };
@@ -2218,6 +2207,7 @@
 	[tegra_clk_pll_c4_out1] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT1, .present = true },
 	[tegra_clk_pll_c4_out2] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT2, .present = true },
 	[tegra_clk_pll_c4_out3] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT3, .present = true },
+	[tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true },
 };
 
 static struct tegra_devclk devclks[] __initdata = {
@@ -2519,7 +2509,7 @@
 
 	/* PLLU_VCO */
 	val = readl(clk_base + pll_u_vco_params.base_reg);
-	val &= ~BIT(24); /* disable PLLU_OVERRIDE */
+	val &= ~PLLU_BASE_OVERRIDE; /* disable PLLU_OVERRIDE */
 	writel(val, clk_base + pll_u_vco_params.base_reg);
 
 	clk = tegra_clk_register_pllre("pll_u_vco", "pll_ref", clk_base, pmc,
@@ -2738,8 +2728,6 @@
 	{ TEGRA210_CLK_DFLL_REF, TEGRA210_CLK_PLL_P, 51000000, 1 },
 	{ TEGRA210_CLK_SBC4, TEGRA210_CLK_PLL_P, 12000000, 1 },
 	{ TEGRA210_CLK_PLL_RE_VCO, TEGRA210_CLK_CLK_MAX, 672000000, 1 },
-	{ TEGRA210_CLK_PLL_U_OUT1, TEGRA210_CLK_CLK_MAX, 48000000, 1 },
-	{ TEGRA210_CLK_PLL_U_OUT2, TEGRA210_CLK_CLK_MAX, 60000000, 1 },
 	{ TEGRA210_CLK_XUSB_GATE, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_XUSB_SS_SRC, TEGRA210_CLK_PLL_U_480M, 120000000, 0 },
 	{ TEGRA210_CLK_XUSB_FS_SRC, TEGRA210_CLK_PLL_U_48M, 48000000, 0 },
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 1c30038..cc73929 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -460,7 +460,8 @@
 
 	parent = clk_hw_get_parent(hw);
 
-	if (clk_hw_get_rate(hw) == clk_get_rate(dd->clk_bypass)) {
+	if (clk_hw_get_rate(hw) ==
+	    clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
 		WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
 		r = _omap3_noncore_dpll_bypass(clk);
 	} else {
diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c
index e62f8cb..3bca438 100644
--- a/drivers/clk/versatile/clk-icst.c
+++ b/drivers/clk/versatile/clk-icst.c
@@ -78,6 +78,9 @@
 	ret = regmap_read(icst->map, icst->vcoreg_off, &val);
 	if (ret)
 		return ret;
+
+	/* Mask the 18 bits used by the VCO */
+	val &= ~0x7ffff;
 	val |= vco.v | (vco.r << 9) | (vco.s << 16);
 
 	/* This magic unlocks the VCO so it can be controlled */
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index c9c4db6..c346be6 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -30,6 +30,8 @@
 config DIGICOLOR_TIMER
 	bool "Digicolor timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	depends on HAS_IOMEM
 	help
 	  Enables the support for the digicolor timer driver.
 
@@ -55,6 +57,7 @@
 	bool "Armada 370 and XP timer driver" if COMPILE_TEST
 	depends on ARM
 	select CLKSRC_OF
+	select CLKSRC_MMIO
 	help
 	  Enables the support for the Armada 370 and XP timer driver.
 
@@ -76,6 +79,7 @@
 config SUN4I_TIMER
 	bool "Sun4i timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
 	  Enables support for the Sun4i timer.
@@ -89,6 +93,7 @@
 
 config TEGRA_TIMER
 	bool "Tegra timer driver" if COMPILE_TEST
+	select CLKSRC_MMIO
 	depends on ARM
 	help
 	  Enables support for the Tegra driver.
@@ -96,6 +101,7 @@
 config VT8500_TIMER
 	bool "VT8500 timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	help
 	  Enables support for the VT8500 driver.
 
@@ -131,6 +137,7 @@
 config CLKSRC_DBX500_PRCMU
 	bool "Clocksource PRCMU Timer" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	help
 	  Use the always on PRCMU Timer as clocksource
 
@@ -249,6 +256,7 @@
 config CLKSRC_SAMSUNG_PWM
 	bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	help
 	  This is a new clocksource driver for the PWM timer found in
 	  Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -258,12 +266,14 @@
 config FSL_FTM_TIMER
 	bool "Freescale FlexTimer Module driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
 	  Support for Freescale FlexTimer Module (FTM) timer.
 
 config VF_PIT_TIMER
 	bool
+	select CLKSRC_MMIO
 	help
 	  Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
@@ -361,6 +371,7 @@
 config CLKSRC_PXA
 	bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
+	depends on HAS_IOMEM
 	select CLKSRC_MMIO
 	help
 	  This enables OST0 support available on PXA and SA-11x0
@@ -395,6 +406,7 @@
 	bool "Low power clocksource found in the LPC" if COMPILE_TEST
 	select CLKSRC_OF if OF
 	depends on HAS_IOMEM
+	select CLKSRC_MMIO
 	help
 	  Enable this option to use the Low Power controller timer
 	  as clocksource.
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 6ee9140..4da2af9 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -98,7 +98,8 @@
 
 	__raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
 	__raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
-	clk_disable(tcd->clk);
+	if (!clockevent_state_detached(d))
+		clk_disable(tcd->clk);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 659879a..f935110 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -296,6 +296,7 @@
 config QORIQ_CPUFREQ
 	tristate "CPU frequency scaling driver for Freescale QorIQ SoCs"
 	depends on OF && COMMON_CLK && (PPC_E500MC || ARM)
+	depends on !CPU_THERMAL || THERMAL
 	select CLK_QORIQ
 	help
 	  This adds the CPUFreq driver support for Freescale QorIQ SoCs
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 0031069..14b1f93 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -84,10 +84,10 @@
 	  SoCs.
 
 config ARM_MT8173_CPUFREQ
-	bool "Mediatek MT8173 CPUFreq support"
+	tristate "Mediatek MT8173 CPUFreq support"
 	depends on ARCH_MEDIATEK && REGULATOR
 	depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST)
-	depends on !CPU_THERMAL || THERMAL=y
+	depends on !CPU_THERMAL || THERMAL
 	select PM_OPP
 	help
 	  This adds the CPUFreq driver support for Mediatek MT8173 SoC.
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 9bc37c4..0ca74d0 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -142,15 +142,16 @@
 
 try_again:
 	cpu_reg = regulator_get_optional(cpu_dev, reg);
-	if (IS_ERR(cpu_reg)) {
+	ret = PTR_ERR_OR_ZERO(cpu_reg);
+	if (ret) {
 		/*
 		 * If cpu's regulator supply node is present, but regulator is
 		 * not yet registered, we should try defering probe.
 		 */
-		if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+		if (ret == -EPROBE_DEFER) {
 			dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
 				cpu);
-			return -EPROBE_DEFER;
+			return ret;
 		}
 
 		/* Try with "cpu-supply" */
@@ -159,18 +160,16 @@
 			goto try_again;
 		}
 
-		dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-			cpu, PTR_ERR(cpu_reg));
+		dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
 	}
 
 	cpu_clk = clk_get(cpu_dev, NULL);
-	if (IS_ERR(cpu_clk)) {
+	ret = PTR_ERR_OR_ZERO(cpu_clk);
+	if (ret) {
 		/* put regulator */
 		if (!IS_ERR(cpu_reg))
 			regulator_put(cpu_reg);
 
-		ret = PTR_ERR(cpu_clk);
-
 		/*
 		 * If cpu's clk node is present, but clock is not yet
 		 * registered, we should try defering probe.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c35e7da..e979ec7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -48,11 +48,11 @@
 					  bool active)
 {
 	do {
-		policy = list_next_entry(policy, policy_list);
-
 		/* No more policies in the list */
-		if (&policy->policy_list == &cpufreq_policy_list)
+		if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
 			return NULL;
+
+		policy = list_next_entry(policy, policy_list);
 	} while (!suitable_policy(policy, active));
 
 	return policy;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index bab3a51..e0d1110 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -387,16 +387,18 @@
 	if (!have_governor_per_policy())
 		cdata->gdbs_data = dbs_data;
 
+	policy->governor_data = dbs_data;
+
 	ret = sysfs_create_group(get_governor_parent_kobj(policy),
 				 get_sysfs_attr(dbs_data));
 	if (ret)
 		goto reset_gdbs_data;
 
-	policy->governor_data = dbs_data;
-
 	return 0;
 
 reset_gdbs_data:
+	policy->governor_data = NULL;
+
 	if (!have_governor_per_policy())
 		cdata->gdbs_data = NULL;
 	cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@
 	if (!cdbs->shared || cdbs->shared->policy)
 		return -EBUSY;
 
-	policy->governor_data = NULL;
 	if (!--dbs_data->usage_count) {
 		sysfs_remove_group(get_governor_parent_kobj(policy),
 				   get_sysfs_attr(dbs_data));
 
+		policy->governor_data = NULL;
+
 		if (!have_governor_per_policy())
 			cdata->gdbs_data = NULL;
 
 		cdata->exit(dbs_data, policy->governor->initialized == 1);
 		kfree(dbs_data);
+	} else {
+		policy->governor_data = NULL;
 	}
 
 	free_common_dbs_info(policy, cdata);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d47..3a4b39a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1431,7 +1431,7 @@
 	if (!all_cpu_data)
 		return -ENOMEM;
 
-	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
+	if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
 		pr_info("intel_pstate: HWP enabled\n");
 		hwp_active++;
 	}
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 1efba34..2058e6d 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -17,6 +17,7 @@
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 1d99c97..0963772 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -202,7 +202,7 @@
 	}
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
 	return 0;
 }
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 344058f..d5657d5 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -119,7 +119,6 @@
 
 #define CPUIDLE_COUPLED_NOT_IDLE	(-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 046423b..f996efc 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -153,7 +153,7 @@
 	 * be frozen safely.
 	 */
 	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-	if (index >= 0)
+	if (index > 0)
 		enter_freeze_proper(drv, dev, index);
 
 	return index;
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 6dd3317..3eb3f12 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -400,7 +400,7 @@
 {
 	int err;
 
-	err = clk_prepare_enable(dd->iclk);
+	err = clk_enable(dd->iclk);
 	if (err)
 		return err;
 
@@ -430,7 +430,7 @@
 
 	dev_info(dd->dev, "version: 0x%x\n", dd->hw_version);
 
-	clk_disable_unprepare(dd->iclk);
+	clk_disable(dd->iclk);
 	return 0;
 }
 
@@ -448,7 +448,7 @@
 
 static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 {
-	clk_disable_unprepare(dd->iclk);
+	clk_disable(dd->iclk);
 	dd->flags &= ~AES_FLAGS_BUSY;
 
 	if (dd->is_async)
@@ -2091,10 +2091,14 @@
 		goto res_err;
 	}
 
-	err = atmel_aes_hw_version_init(aes_dd);
+	err = clk_prepare(aes_dd->iclk);
 	if (err)
 		goto res_err;
 
+	err = atmel_aes_hw_version_init(aes_dd);
+	if (err)
+		goto iclk_unprepare;
+
 	atmel_aes_get_cap(aes_dd);
 
 	err = atmel_aes_buff_init(aes_dd);
@@ -2127,6 +2131,8 @@
 err_aes_dma:
 	atmel_aes_buff_cleanup(aes_dd);
 err_aes_buff:
+iclk_unprepare:
+	clk_unprepare(aes_dd->iclk);
 res_err:
 	tasklet_kill(&aes_dd->done_task);
 	tasklet_kill(&aes_dd->queue_task);
@@ -2155,6 +2161,8 @@
 	atmel_aes_dma_cleanup(aes_dd);
 	atmel_aes_buff_cleanup(aes_dd);
 
+	clk_unprepare(aes_dd->iclk);
+
 	return 0;
 }
 
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 20de861..8bf9914 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -782,7 +782,7 @@
 	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
 			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
 
-	clk_disable_unprepare(dd->iclk);
+	clk_disable(dd->iclk);
 
 	if (req->base.complete)
 		req->base.complete(&req->base, err);
@@ -795,7 +795,7 @@
 {
 	int err;
 
-	err = clk_prepare_enable(dd->iclk);
+	err = clk_enable(dd->iclk);
 	if (err)
 		return err;
 
@@ -822,7 +822,7 @@
 	dev_info(dd->dev,
 			"version: 0x%x\n", dd->hw_version);
 
-	clk_disable_unprepare(dd->iclk);
+	clk_disable(dd->iclk);
 }
 
 static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
@@ -1410,6 +1410,10 @@
 		goto res_err;
 	}
 
+	err = clk_prepare(sha_dd->iclk);
+	if (err)
+		goto res_err;
+
 	atmel_sha_hw_version_init(sha_dd);
 
 	atmel_sha_get_cap(sha_dd);
@@ -1421,12 +1425,12 @@
 			if (IS_ERR(pdata)) {
 				dev_err(&pdev->dev, "platform data not available\n");
 				err = PTR_ERR(pdata);
-				goto res_err;
+				goto iclk_unprepare;
 			}
 		}
 		if (!pdata->dma_slave) {
 			err = -ENXIO;
-			goto res_err;
+			goto iclk_unprepare;
 		}
 		err = atmel_sha_dma_init(sha_dd, pdata);
 		if (err)
@@ -1457,6 +1461,8 @@
 	if (sha_dd->caps.has_dma)
 		atmel_sha_dma_cleanup(sha_dd);
 err_sha_dma:
+iclk_unprepare:
+	clk_unprepare(sha_dd->iclk);
 res_err:
 	tasklet_kill(&sha_dd->done_task);
 sha_dd_err:
@@ -1483,12 +1489,7 @@
 	if (sha_dd->caps.has_dma)
 		atmel_sha_dma_cleanup(sha_dd);
 
-	iounmap(sha_dd->io_base);
-
-	clk_put(sha_dd->iclk);
-
-	if (sha_dd->irq >= 0)
-		free_irq(sha_dd->irq, sha_dd);
+	clk_unprepare(sha_dd->iclk);
 
 	return 0;
 }
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 8abb4bc..69d4a13 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -534,8 +534,8 @@
 	 * long pointers in master configuration register
 	 */
 	clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH |
-		      MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ?
-					MCFGR_LONG_PTR : 0));
+		      MCFGR_AWCACHE_BUFF | MCFGR_WDENABLE |
+		      (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
 
 	/*
 	 *  Read the Compile Time paramters and SCFGR to determine
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 0643e33..c0656e7 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -306,7 +306,7 @@
 		return -ENOMEM;
 
 	dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0);
-	if (!dma->cache_pool)
+	if (!dma->padding_pool)
 		return -ENOMEM;
 
 	cesa->dma = dma;
diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index 848b93ee..fe9dce0 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -500,6 +500,8 @@
 	clk_set_min_rate(tegra->emc_clock, rate);
 	clk_set_rate(tegra->emc_clock, 0);
 
+	*freq = rate;
+
 	return 0;
 }
 
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 64f5d1b..8e304b1 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -176,6 +176,7 @@
 #define AT_XDMAC_MAX_CHAN	0x20
 #define AT_XDMAC_MAX_CSIZE	16	/* 16 data */
 #define AT_XDMAC_MAX_DWIDTH	8	/* 64 bits */
+#define AT_XDMAC_RESIDUE_MAX_RETRIES	5
 
 #define AT_XDMAC_DMA_BUSWIDTHS\
 	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
@@ -1395,8 +1396,8 @@
 	struct at_xdmac_desc	*desc, *_desc;
 	struct list_head	*descs_list;
 	enum dma_status		ret;
-	int			residue;
-	u32			cur_nda, mask, value;
+	int			residue, retry;
+	u32			cur_nda, check_nda, cur_ubc, mask, value;
 	u8			dwidth = 0;
 	unsigned long		flags;
 
@@ -1433,7 +1434,42 @@
 			cpu_relax();
 	}
 
+	/*
+	 * When processing the residue, we need to read two registers but we
+	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
+	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
+	 * to know how many data are remaining for the current descriptor.
+	 * Since the dma channel is not paused to not loose data, between the
+	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
+	 * descriptor.
+	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
+	 * still using the same descriptor by reading a second time
+	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
+	 * read again AT_XDMAC_CUBC.
+	 * Memory barriers are used to ensure the read order of the registers.
+	 * A max number of retries is set because unlikely it can never ends if
+	 * we are transferring a lot of data with small buffers.
+	 */
 	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+	rmb();
+	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+		rmb();
+		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+
+		if (likely(cur_nda == check_nda))
+			break;
+
+		cur_nda = check_nda;
+		rmb();
+		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+	}
+
+	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+		ret = DMA_ERROR;
+		goto spin_unlock;
+	}
+
 	/*
 	 * Remove size of all microblocks already transferred and the current
 	 * one. Then add the remaining size to transfer of the current
@@ -1446,7 +1482,7 @@
 		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
 			break;
 	}
-	residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
+	residue += cur_ubc << dwidth;
 
 	dma_set_residue(txstate, residue);
 
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index e893318..5ad0ec1 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -156,7 +156,6 @@
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
-	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
 	dwc->initialized = true;
@@ -588,6 +587,9 @@
 
 		spin_unlock_irqrestore(&dwc->lock, flags);
 	}
+
+	/* Re-enable interrupts */
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -618,11 +620,8 @@
 			dwc_scan_descriptors(dw, dwc);
 	}
 
-	/*
-	 * Re-enable interrupts.
-	 */
+	/* Re-enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
 }
 
@@ -1261,6 +1260,7 @@
 int dw_dma_cyclic_start(struct dma_chan *chan)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
 	unsigned long		flags;
 
 	if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
@@ -1269,7 +1269,12 @@
 	}
 
 	spin_lock_irqsave(&dwc->lock, flags);
+
+	/* Enable interrupts to perform cyclic transfer */
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+
 	dwc_dostart(dwc, dwc->cdesc->desc[0]);
+
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return 0;
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 4c30fdd..358f968 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -108,6 +108,10 @@
 
 	/* Haswell */
 	{ PCI_VDEVICE(INTEL, 0x9c60) },
+
+	/* Broadwell */
+	{ PCI_VDEVICE(INTEL, 0x9ce0) },
+
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d92d655..e3d7fcb 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -113,6 +113,9 @@
 #define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
 #define CHMAP_EXIST		BIT(24)
 
+/* CCSTAT register */
+#define EDMA_CCSTAT_ACTV	BIT(4)
+
 /*
  * Max of 20 segments per channel to conserve PaRAM slots
  * Also note that MAX_NR_SG should be atleast the no.of periods
@@ -1680,9 +1683,20 @@
 	spin_unlock_irqrestore(&echan->vchan.lock, flags);
 }
 
+/*
+ * This limit exists to avoid a possible infinite loop when waiting for proof
+ * that a particular transfer is completed. This limit can be hit if there
+ * are large bursts to/from slow devices or the CPU is never able to catch
+ * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART
+ * RX-FIFO, as many as 55 loops have been seen.
+ */
+#define EDMA_MAX_TR_WAIT_LOOPS 1000
+
 static u32 edma_residue(struct edma_desc *edesc)
 {
 	bool dst = edesc->direction == DMA_DEV_TO_MEM;
+	int loop_count = EDMA_MAX_TR_WAIT_LOOPS;
+	struct edma_chan *echan = edesc->echan;
 	struct edma_pset *pset = edesc->pset;
 	dma_addr_t done, pos;
 	int i;
@@ -1691,7 +1705,32 @@
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
+	pos = edma_get_position(echan->ecc, echan->slot[0], dst);
+
+	/*
+	 * "pos" may represent a transfer request that is still being
+	 * processed by the EDMACC or EDMATC. We will busy wait until
+	 * any one of the situations occurs:
+	 *   1. the DMA hardware is idle
+	 *   2. a new transfer request is setup
+	 *   3. we hit the loop limit
+	 */
+	while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) {
+		/* check if a new transfer request is setup */
+		if (edma_get_position(echan->ecc,
+				      echan->slot[0], dst) != pos) {
+			break;
+		}
+
+		if (!--loop_count) {
+			dev_dbg_ratelimited(echan->vchan.chan.device->dev,
+				"%s: timeout waiting for PaRAM update\n",
+				__func__);
+			break;
+		}
+
+		cpu_relax();
+	}
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 2209f75..aac85c3 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -522,6 +522,8 @@
 			chan_dbg(chan, "LD %p callback\n", desc);
 			txd->callback(txd->callback_param);
 		}
+
+		dma_descriptor_unmap(txd);
 	}
 
 	/* Run any dependencies */
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1d5df2e..21539d5 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -861,32 +861,42 @@
 			return;
 	}
 
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+
+	/* handle the no-actives case */
+	if (!ioat_ring_active(ioat_chan)) {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		check_active(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		return;
+	}
+
 	/* if we haven't made progress and we have already
 	 * acknowledged a pending completion once, then be more
 	 * forceful with a restart
 	 */
-	spin_lock_bh(&ioat_chan->cleanup_lock);
 	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
 		__cleanup(ioat_chan, phys_complete);
 	else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+		u32 chanerr;
+
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
+		dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+			 status, chanerr);
+		dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
+			 ioat_ring_active(ioat_chan));
+
 		spin_lock_bh(&ioat_chan->prep_lock);
 		ioat_restart_channel(ioat_chan);
 		spin_unlock_bh(&ioat_chan->prep_lock);
 		spin_unlock_bh(&ioat_chan->cleanup_lock);
 		return;
-	} else {
+	} else
 		set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
-		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
-	}
 
-
-	if (ioat_ring_active(ioat_chan))
-		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
-	else {
-		spin_lock_bh(&ioat_chan->prep_lock);
-		check_active(ioat_chan);
-		spin_unlock_bh(&ioat_chan->prep_lock);
-	}
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 	spin_unlock_bh(&ioat_chan->cleanup_lock);
 }
 
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index e4f4312..f039cfa 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1300,10 +1300,10 @@
 	 * note: writecombine gives slightly better performance, but
 	 * requires that we explicitly flush the writes
 	 */
-	adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
-							  plat_data->pool_size,
-							  &adev->dma_desc_pool,
-							  GFP_KERNEL);
+	adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev,
+						plat_data->pool_size,
+						&adev->dma_desc_pool,
+						GFP_KERNEL);
 	if (!adev->dma_desc_pool_virt) {
 		ret = -ENOMEM;
 		goto err_free_adev;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 14091f8..3922a5d 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -964,8 +964,8 @@
 	 * requires that we explicitly flush the writes
 	 */
 	mv_chan->dma_desc_pool_virt =
-	  dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE,
-				 &mv_chan->dma_desc_pool, GFP_KERNEL);
+	  dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool,
+		       GFP_KERNEL);
 	if (!mv_chan->dma_desc_pool_virt)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index f2a0310..debca82 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -583,6 +583,8 @@
 		(PXA_DCMD_LENGTH & sizeof(u32));
 	if (flags & DMA_PREP_INTERRUPT)
 		updater->dcmd |= PXA_DCMD_ENDIRQEN;
+	if (sw_desc->cyclic)
+		sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first;
 }
 
 static bool is_desc_completed(struct virt_dma_desc *vd)
@@ -673,6 +675,10 @@
 		dev_dbg(&chan->vc.chan.dev->device,
 			"%s(): checking txd %p[%x]: completed=%d\n",
 			__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+		if (to_pxad_sw_desc(vd)->cyclic) {
+			vchan_cyclic_callback(vd);
+			break;
+		}
 		if (is_desc_completed(vd)) {
 			list_del(&vd->node);
 			vchan_cookie_complete(vd);
@@ -1080,7 +1086,7 @@
 		return NULL;
 
 	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
-	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len);
+	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len);
 	dev_dbg(&chan->vc.chan.dev->device,
 		"%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
 		__func__, (unsigned long)buf_addr, len, period_len, dir, flags);
diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c
index 5a250cd..d34aef7 100644
--- a/drivers/dma/qcom_bam_dma.c
+++ b/drivers/dma/qcom_bam_dma.c
@@ -502,8 +502,8 @@
 		return 0;
 
 	/* allocate FIFO descriptor space, but only if necessary */
-	bchan->fifo_virt = dma_alloc_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
-				&bchan->fifo_phys, GFP_KERNEL);
+	bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+					&bchan->fifo_phys, GFP_KERNEL);
 
 	if (!bchan->fifo_virt) {
 		dev_err(bdev->dev, "Failed to allocate desc fifo\n");
@@ -538,8 +538,8 @@
 	bam_reset_channel(bchan);
 	spin_unlock_irqrestore(&bchan->vc.lock, flags);
 
-	dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
-				bchan->fifo_phys);
+	dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
+		    bchan->fifo_phys);
 	bchan->fifo_virt = NULL;
 
 	/* mask irq for pipe/channel */
@@ -1231,9 +1231,9 @@
 		bam_dma_terminate_all(&bdev->channels[i].vc.chan);
 		tasklet_kill(&bdev->channels[i].vc.task);
 
-		dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
-			bdev->channels[i].fifo_virt,
-			bdev->channels[i].fifo_phys);
+		dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+			    bdev->channels[i].fifo_virt,
+			    bdev->channels[i].fifo_phys);
 	}
 
 	tasklet_kill(&bdev->task);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index e3a945c..49768c0 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -147,6 +147,135 @@
 	"Status Register File",
 };
 
+/* Scalable MCA error strings */
+static const char * const f17h_ls_mce_desc[] = {
+	"Load queue parity",
+	"Store queue parity",
+	"Miss address buffer payload parity",
+	"L1 TLB parity",
+	"",						/* reserved */
+	"DC tag error type 6",
+	"DC tag error type 1",
+	"Internal error type 1",
+	"Internal error type 2",
+	"Sys Read data error thread 0",
+	"Sys read data error thread 1",
+	"DC tag error type 2",
+	"DC data error type 1 (poison comsumption)",
+	"DC data error type 2",
+	"DC data error type 3",
+	"DC tag error type 4",
+	"L2 TLB parity",
+	"PDC parity error",
+	"DC tag error type 3",
+	"DC tag error type 5",
+	"L2 fill data error",
+};
+
+static const char * const f17h_if_mce_desc[] = {
+	"microtag probe port parity error",
+	"IC microtag or full tag multi-hit error",
+	"IC full tag parity",
+	"IC data array parity",
+	"Decoupling queue phys addr parity error",
+	"L0 ITLB parity error",
+	"L1 ITLB parity error",
+	"L2 ITLB parity error",
+	"BPQ snoop parity on Thread 0",
+	"BPQ snoop parity on Thread 1",
+	"L1 BTB multi-match error",
+	"L2 BTB multi-match error",
+};
+
+static const char * const f17h_l2_mce_desc[] = {
+	"L2M tag multi-way-hit error",
+	"L2M tag ECC error",
+	"L2M data ECC error",
+	"HW assert",
+};
+
+static const char * const f17h_de_mce_desc[] = {
+	"uop cache tag parity error",
+	"uop cache data parity error",
+	"Insn buffer parity error",
+	"Insn dispatch queue parity error",
+	"Fetch address FIFO parity",
+	"Patch RAM data parity",
+	"Patch RAM sequencer parity",
+	"uop buffer parity"
+};
+
+static const char * const f17h_ex_mce_desc[] = {
+	"Watchdog timeout error",
+	"Phy register file parity",
+	"Flag register file parity",
+	"Immediate displacement register file parity",
+	"Address generator payload parity",
+	"EX payload parity",
+	"Checkpoint queue parity",
+	"Retire dispatch queue parity",
+};
+
+static const char * const f17h_fp_mce_desc[] = {
+	"Physical register file parity",
+	"Freelist parity error",
+	"Schedule queue parity",
+	"NSQ parity error",
+	"Retire queue parity",
+	"Status register file parity",
+};
+
+static const char * const f17h_l3_mce_desc[] = {
+	"Shadow tag macro ECC error",
+	"Shadow tag macro multi-way-hit error",
+	"L3M tag ECC error",
+	"L3M tag multi-way-hit error",
+	"L3M data ECC error",
+	"XI parity, L3 fill done channel error",
+	"L3 victim queue parity",
+	"L3 HW assert",
+};
+
+static const char * const f17h_cs_mce_desc[] = {
+	"Illegal request from transport layer",
+	"Address violation",
+	"Security violation",
+	"Illegal response from transport layer",
+	"Unexpected response",
+	"Parity error on incoming request or probe response data",
+	"Parity error on incoming read response data",
+	"Atomic request parity",
+	"ECC error on probe filter access",
+};
+
+static const char * const f17h_pie_mce_desc[] = {
+	"HW assert",
+	"Internal PIE register security violation",
+	"Error on GMI link",
+	"Poison data written to internal PIE register",
+};
+
+static const char * const f17h_umc_mce_desc[] = {
+	"DRAM ECC error",
+	"Data poison error on DRAM",
+	"SDP parity error",
+	"Advanced peripheral bus error",
+	"Command/address parity error",
+	"Write data CRC error",
+};
+
+static const char * const f17h_pb_mce_desc[] = {
+	"Parameter Block RAM ECC error",
+};
+
+static const char * const f17h_psp_mce_desc[] = {
+	"PSP RAM ECC or parity error",
+};
+
+static const char * const f17h_smu_mce_desc[] = {
+	"SMU RAM ECC or parity error",
+};
+
 static bool f12h_mc0_mce(u16 ec, u8 xec)
 {
 	bool ret = false;
@@ -691,6 +820,177 @@
 	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
 }
 
+static void decode_f17h_core_errors(const char *ip_name, u8 xec,
+				   unsigned int mca_type)
+{
+	const char * const *error_desc_array;
+	size_t len;
+
+	pr_emerg(HW_ERR "%s Error: ", ip_name);
+
+	switch (mca_type) {
+	case SMCA_LS:
+		error_desc_array = f17h_ls_mce_desc;
+		len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
+
+		if (xec == 0x4) {
+			pr_cont("Unrecognized LS MCA error code.\n");
+			return;
+		}
+		break;
+
+	case SMCA_IF:
+		error_desc_array = f17h_if_mce_desc;
+		len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
+		break;
+
+	case SMCA_L2_CACHE:
+		error_desc_array = f17h_l2_mce_desc;
+		len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
+		break;
+
+	case SMCA_DE:
+		error_desc_array = f17h_de_mce_desc;
+		len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
+		break;
+
+	case SMCA_EX:
+		error_desc_array = f17h_ex_mce_desc;
+		len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
+		break;
+
+	case SMCA_FP:
+		error_desc_array = f17h_fp_mce_desc;
+		len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
+		break;
+
+	case SMCA_L3_CACHE:
+		error_desc_array = f17h_l3_mce_desc;
+		len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
+		break;
+
+	default:
+		pr_cont("Corrupted MCA core error info.\n");
+		return;
+	}
+
+	if (xec > len) {
+		pr_cont("Unrecognized %s MCA bank error code.\n",
+			 amd_core_mcablock_names[mca_type]);
+		return;
+	}
+
+	pr_cont("%s.\n", error_desc_array[xec]);
+}
+
+static void decode_df_errors(u8 xec, unsigned int mca_type)
+{
+	const char * const *error_desc_array;
+	size_t len;
+
+	pr_emerg(HW_ERR "Data Fabric Error: ");
+
+	switch (mca_type) {
+	case  SMCA_CS:
+		error_desc_array = f17h_cs_mce_desc;
+		len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
+		break;
+
+	case SMCA_PIE:
+		error_desc_array = f17h_pie_mce_desc;
+		len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
+		break;
+
+	default:
+		pr_cont("Corrupted MCA Data Fabric info.\n");
+		return;
+	}
+
+	if (xec > len) {
+		pr_cont("Unrecognized %s MCA bank error code.\n",
+			 amd_df_mcablock_names[mca_type]);
+		return;
+	}
+
+	pr_cont("%s.\n", error_desc_array[xec]);
+}
+
+/* Decode errors according to Scalable MCA specification */
+static void decode_smca_errors(struct mce *m)
+{
+	u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
+	unsigned int hwid, mca_type, i;
+	u8 xec = XEC(m->status, xec_mask);
+	const char * const *error_desc_array;
+	const char *ip_name;
+	u32 low, high;
+	size_t len;
+
+	if (rdmsr_safe(addr, &low, &high)) {
+		pr_emerg("Invalid IP block specified, error information is unreliable.\n");
+		return;
+	}
+
+	hwid = high & MCI_IPID_HWID;
+	mca_type = (high & MCI_IPID_MCATYPE) >> 16;
+
+	pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
+
+	/*
+	 * Based on hwid and mca_type values, decode errors from respective IPs.
+	 * Note: mca_type values make sense only in the context of an hwid.
+	 */
+	for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
+		if (amd_hwids[i].hwid == hwid)
+			break;
+
+	switch (i) {
+	case SMCA_F17H_CORE:
+		ip_name = (mca_type == SMCA_L3_CACHE) ?
+			  "L3 Cache" : "F17h Core";
+		return decode_f17h_core_errors(ip_name, xec, mca_type);
+		break;
+
+	case SMCA_DF:
+		return decode_df_errors(xec, mca_type);
+		break;
+
+	case SMCA_UMC:
+		error_desc_array = f17h_umc_mce_desc;
+		len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
+		break;
+
+	case SMCA_PB:
+		error_desc_array = f17h_pb_mce_desc;
+		len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
+		break;
+
+	case SMCA_PSP:
+		error_desc_array = f17h_psp_mce_desc;
+		len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
+		break;
+
+	case SMCA_SMU:
+		error_desc_array = f17h_smu_mce_desc;
+		len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
+		break;
+
+	default:
+		pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+		return;
+	}
+
+	ip_name = amd_hwids[i].name;
+	pr_emerg(HW_ERR "%s Error: ", ip_name);
+
+	if (xec > len) {
+		pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
+		return;
+	}
+
+	pr_cont("%s.\n", error_desc_array[xec]);
+}
+
 static inline void amd_decode_err_code(u16 ec)
 {
 	if (INT_ERROR(ec)) {
@@ -752,6 +1052,7 @@
 	struct mce *m = (struct mce *)data;
 	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
 	int ecc;
+	u32 ebx = cpuid_ebx(0x80000007);
 
 	if (amd_filter_mce(m))
 		return NOTIFY_STOP;
@@ -769,11 +1070,20 @@
 		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
 		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
 
-	if (c->x86 == 0x15 || c->x86 == 0x16)
+	if (c->x86 >= 0x15)
 		pr_cont("|%s|%s",
 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
 
+	if (!!(ebx & BIT(3))) {
+		u32 low, high;
+		u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+
+		if (!rdmsr_safe(addr, &low, &high) &&
+		    (low & MCI_CONFIG_MCAX))
+			pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
+	}
+
 	/* do the two bits[14:13] together */
 	ecc = (m->status >> 45) & 0x3;
 	if (ecc)
@@ -784,6 +1094,11 @@
 	if (m->status & MCI_STATUS_ADDRV)
 		pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
 
+	if (!!(ebx & BIT(3))) {
+		decode_smca_errors(m);
+		goto err_code;
+	}
+
 	if (!fam_ops)
 		goto err_code;
 
@@ -834,6 +1149,7 @@
 static int __init mce_amd_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 ebx;
 
 	if (c->x86_vendor != X86_VENDOR_AMD)
 		return -ENODEV;
@@ -888,10 +1204,18 @@
 		fam_ops->mc2_mce = f16h_mc2_mce;
 		break;
 
+	case 0x17:
+		ebx = cpuid_ebx(0x80000007);
+		xec_mask = 0x3f;
+		if (!(ebx & BIT(3))) {
+			printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
+			goto err_out;
+		}
+		break;
+
 	default:
 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
-		kfree(fam_ops);
-		fam_ops = NULL;
+		goto err_out;
 	}
 
 	pr_info("MCE: In-kernel MCE decoding enabled.\n");
@@ -899,6 +1223,11 @@
 	mce_register_decode_chain(&amd_mce_dec_nb);
 
 	return 0;
+
+err_out:
+	kfree(fam_ops);
+	fam_ops = NULL;
+	return -EINVAL;
 }
 early_initcall(mce_amd_init);
 
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index e438ee5..93f0d41 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1574,7 +1574,7 @@
 				for (cha = 0; cha < KNL_MAX_CHAS; cha++) {
 					if (knl_get_mc_route(target,
 						mc_route_reg[cha]) == channel
-						&& participants[channel]) {
+						&& !participants[channel]) {
 						participant_count++;
 						participants[channel] = 1;
 						break;
@@ -1839,8 +1839,8 @@
 		edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
 			 n_tads, gb, (mb*1000)/1024,
 			 ((u64)tmp_mb) << 20L,
-			 (u32)TAD_SOCK(reg),
-			 (u32)TAD_CH(reg),
+			 (u32)(1 << TAD_SOCK(reg)),
+			 (u32)TAD_CH(reg) + 1,
 			 (u32)TAD_TGT0(reg),
 			 (u32)TAD_TGT1(reg),
 			 (u32)TAD_TGT2(reg),
@@ -2118,7 +2118,7 @@
 	}
 
 	ch_way = TAD_CH(reg) + 1;
-	sck_way = TAD_SOCK(reg) + 1;
+	sck_way = 1 << TAD_SOCK(reg);
 
 	if (ch_way == 3)
 		idx = addr >> 6;
@@ -2175,7 +2175,7 @@
 		 n_tads,
 		 addr,
 		 limit,
-		 (u32)TAD_SOCK(reg),
+		 sck_way,
 		 ch_way,
 		 offset,
 		 idx,
@@ -2190,18 +2190,12 @@
 			offset, addr);
 		return -EINVAL;
 	}
-	addr -= offset;
-	/* Store the low bits [0:6] of the addr */
-	ch_addr = addr & 0x7f;
-	/* Remove socket wayness and remove 6 bits */
-	addr >>= 6;
-	addr = div_u64(addr, sck_xch);
-#if 0
-	/* Divide by channel way */
-	addr = addr / ch_way;
-#endif
-	/* Recover the last 6 bits */
-	ch_addr |= addr << 6;
+
+	ch_addr = addr - offset;
+	ch_addr >>= (6 + shiftup);
+	ch_addr /= ch_way * sck_way;
+	ch_addr <<= (6 + shiftup);
+	ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
 
 	/*
 	 * Step 3) Decode rank
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 756eca8..10e6774 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -221,7 +221,7 @@
 	}
 
 	if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
-	    efivar_validate(name, data, size) == false) {
+	    efivar_validate(vendor, name, data, size) == false) {
 		printk(KERN_ERR "efivars: Malformed variable content\n");
 		return -EINVAL;
 	}
@@ -447,7 +447,8 @@
 	}
 
 	if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
-	    efivar_validate(name, data, size) == false) {
+	    efivar_validate(new_var->VendorGuid, name, data,
+			    size) == false) {
 		printk(KERN_ERR "efivars: Malformed variable content\n");
 		return -EINVAL;
 	}
@@ -540,38 +541,30 @@
 static int
 efivar_create_sysfs_entry(struct efivar_entry *new_var)
 {
-	int i, short_name_size;
+	int short_name_size;
 	char *short_name;
-	unsigned long variable_name_size;
-	efi_char16_t *variable_name;
+	unsigned long utf8_name_size;
+	efi_char16_t *variable_name = new_var->var.VariableName;
 	int ret;
 
-	variable_name = new_var->var.VariableName;
-	variable_name_size = ucs2_strlen(variable_name) * sizeof(efi_char16_t);
-
 	/*
-	 * Length of the variable bytes in ASCII, plus the '-' separator,
+	 * Length of the variable bytes in UTF8, plus the '-' separator,
 	 * plus the GUID, plus trailing NUL
 	 */
-	short_name_size = variable_name_size / sizeof(efi_char16_t)
-				+ 1 + EFI_VARIABLE_GUID_LEN + 1;
+	utf8_name_size = ucs2_utf8size(variable_name);
+	short_name_size = utf8_name_size + 1 + EFI_VARIABLE_GUID_LEN + 1;
 
-	short_name = kzalloc(short_name_size, GFP_KERNEL);
-
+	short_name = kmalloc(short_name_size, GFP_KERNEL);
 	if (!short_name)
 		return -ENOMEM;
 
-	/* Convert Unicode to normal chars (assume top bits are 0),
-	   ala UTF-8 */
-	for (i=0; i < (int)(variable_name_size / sizeof(efi_char16_t)); i++) {
-		short_name[i] = variable_name[i] & 0xFF;
-	}
+	ucs2_as_utf8(short_name, variable_name, short_name_size);
+
 	/* This is ugly, but necessary to separate one vendor's
 	   private variables from another's.         */
-
-	*(short_name + strlen(short_name)) = '-';
+	short_name[utf8_name_size] = '-';
 	efi_guid_to_str(&new_var->var.VendorGuid,
-			 short_name + strlen(short_name));
+			 short_name + utf8_name_size + 1);
 
 	new_var->kobj.kset = efivars_kset;
 
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 70a0fb1..7f2ea21 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -165,67 +165,133 @@
 }
 
 struct variable_validate {
+	efi_guid_t vendor;
 	char *name;
 	bool (*validate)(efi_char16_t *var_name, int match, u8 *data,
 			 unsigned long len);
 };
 
+/*
+ * This is the list of variables we need to validate, as well as the
+ * whitelist for what we think is safe not to default to immutable.
+ *
+ * If it has a validate() method that's not NULL, it'll go into the
+ * validation routine.  If not, it is assumed valid, but still used for
+ * whitelisting.
+ *
+ * Note that it's sorted by {vendor,name}, but globbed names must come after
+ * any other name with the same prefix.
+ */
 static const struct variable_validate variable_validate[] = {
-	{ "BootNext", validate_uint16 },
-	{ "BootOrder", validate_boot_order },
-	{ "DriverOrder", validate_boot_order },
-	{ "Boot*", validate_load_option },
-	{ "Driver*", validate_load_option },
-	{ "ConIn", validate_device_path },
-	{ "ConInDev", validate_device_path },
-	{ "ConOut", validate_device_path },
-	{ "ConOutDev", validate_device_path },
-	{ "ErrOut", validate_device_path },
-	{ "ErrOutDev", validate_device_path },
-	{ "Timeout", validate_uint16 },
-	{ "Lang", validate_ascii_string },
-	{ "PlatformLang", validate_ascii_string },
-	{ "", NULL },
+	{ EFI_GLOBAL_VARIABLE_GUID, "BootNext", validate_uint16 },
+	{ EFI_GLOBAL_VARIABLE_GUID, "BootOrder", validate_boot_order },
+	{ EFI_GLOBAL_VARIABLE_GUID, "Boot*", validate_load_option },
+	{ EFI_GLOBAL_VARIABLE_GUID, "DriverOrder", validate_boot_order },
+	{ EFI_GLOBAL_VARIABLE_GUID, "Driver*", validate_load_option },
+	{ EFI_GLOBAL_VARIABLE_GUID, "ConIn", validate_device_path },
+	{ EFI_GLOBAL_VARIABLE_GUID, "ConInDev", validate_device_path },
+	{ EFI_GLOBAL_VARIABLE_GUID, "ConOut", validate_device_path },
+	{ EFI_GLOBAL_VARIABLE_GUID, "ConOutDev", validate_device_path },
+	{ EFI_GLOBAL_VARIABLE_GUID, "ErrOut", validate_device_path },
+	{ EFI_GLOBAL_VARIABLE_GUID, "ErrOutDev", validate_device_path },
+	{ EFI_GLOBAL_VARIABLE_GUID, "Lang", validate_ascii_string },
+	{ EFI_GLOBAL_VARIABLE_GUID, "OsIndications", NULL },
+	{ EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string },
+	{ EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 },
+	{ LINUX_EFI_CRASH_GUID, "*", NULL },
+	{ NULL_GUID, "", NULL },
 };
 
+static bool
+variable_matches(const char *var_name, size_t len, const char *match_name,
+		 int *match)
+{
+	for (*match = 0; ; (*match)++) {
+		char c = match_name[*match];
+		char u = var_name[*match];
+
+		/* Wildcard in the matching name means we've matched */
+		if (c == '*')
+			return true;
+
+		/* Case sensitive match */
+		if (!c && *match == len)
+			return true;
+
+		if (c != u)
+			return false;
+
+		if (!c)
+			return true;
+	}
+	return true;
+}
+
 bool
-efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len)
+efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data,
+		unsigned long data_size)
 {
 	int i;
-	u16 *unicode_name = var_name;
+	unsigned long utf8_size;
+	u8 *utf8_name;
 
-	for (i = 0; variable_validate[i].validate != NULL; i++) {
+	utf8_size = ucs2_utf8size(var_name);
+	utf8_name = kmalloc(utf8_size + 1, GFP_KERNEL);
+	if (!utf8_name)
+		return false;
+
+	ucs2_as_utf8(utf8_name, var_name, utf8_size);
+	utf8_name[utf8_size] = '\0';
+
+	for (i = 0; variable_validate[i].name[0] != '\0'; i++) {
 		const char *name = variable_validate[i].name;
-		int match;
+		int match = 0;
 
-		for (match = 0; ; match++) {
-			char c = name[match];
-			u16 u = unicode_name[match];
+		if (efi_guidcmp(vendor, variable_validate[i].vendor))
+			continue;
 
-			/* All special variables are plain ascii */
-			if (u > 127)
-				return true;
-
-			/* Wildcard in the matching name means we've matched */
-			if (c == '*')
-				return variable_validate[i].validate(var_name,
-							     match, data, len);
-
-			/* Case sensitive match */
-			if (c != u)
+		if (variable_matches(utf8_name, utf8_size+1, name, &match)) {
+			if (variable_validate[i].validate == NULL)
 				break;
-
-			/* Reached the end of the string while matching */
-			if (!c)
-				return variable_validate[i].validate(var_name,
-							     match, data, len);
+			kfree(utf8_name);
+			return variable_validate[i].validate(var_name, match,
+							     data, data_size);
 		}
 	}
-
+	kfree(utf8_name);
 	return true;
 }
 EXPORT_SYMBOL_GPL(efivar_validate);
 
+bool
+efivar_variable_is_removable(efi_guid_t vendor, const char *var_name,
+			     size_t len)
+{
+	int i;
+	bool found = false;
+	int match = 0;
+
+	/*
+	 * Check if our variable is in the validated variables list
+	 */
+	for (i = 0; variable_validate[i].name[0] != '\0'; i++) {
+		if (efi_guidcmp(variable_validate[i].vendor, vendor))
+			continue;
+
+		if (variable_matches(var_name, len,
+				     variable_validate[i].name, &match)) {
+			found = true;
+			break;
+		}
+	}
+
+	/*
+	 * If it's in our list, it is removable.
+	 */
+	return found;
+}
+EXPORT_SYMBOL_GPL(efivar_variable_is_removable);
+
 static efi_status_t
 check_var_size(u32 attributes, unsigned long size)
 {
@@ -852,7 +918,7 @@
 
 	*set = false;
 
-	if (efivar_validate(name, data, *size) == false)
+	if (efivar_validate(*vendor, name, data, *size) == false)
 		return -EINVAL;
 
 	/*
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 2aeaebd..3f87a03 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -312,8 +312,8 @@
 		handle_simple_irq, IRQ_TYPE_NONE);
 
 	if (ret) {
-		dev_info(&pdev->dev, "could not add irqchip\n");
-		return ret;
+		dev_err(&pdev->dev, "could not add irqchip\n");
+		goto teardown;
 	}
 
 	gpiochip_set_chained_irqchip(&altera_gc->mmchip.gc,
@@ -326,6 +326,7 @@
 skip_irq:
 	return 0;
 teardown:
+	of_mm_gpiochip_remove(&altera_gc->mmchip);
 	pr_err("%s: registration failed with status %d\n",
 		node->full_name, ret);
 
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index ec58f42..cd007a6 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -195,7 +195,7 @@
 static int davinci_gpio_probe(struct platform_device *pdev)
 {
 	int i, base;
-	unsigned ngpio;
+	unsigned ngpio, nbank;
 	struct davinci_gpio_controller *chips;
 	struct davinci_gpio_platform_data *pdata;
 	struct davinci_gpio_regs __iomem *regs;
@@ -224,8 +224,9 @@
 	if (WARN_ON(ARCH_NR_GPIOS < ngpio))
 		ngpio = ARCH_NR_GPIOS;
 
+	nbank = DIV_ROUND_UP(ngpio, 32);
 	chips = devm_kzalloc(dev,
-			     ngpio * sizeof(struct davinci_gpio_controller),
+			     nbank * sizeof(struct davinci_gpio_controller),
 			     GFP_KERNEL);
 	if (!chips)
 		return -ENOMEM;
@@ -511,7 +512,7 @@
 			return irq;
 		}
 
-		irq_domain = irq_domain_add_legacy(NULL, ngpio, irq, 0,
+		irq_domain = irq_domain_add_legacy(dev->of_node, ngpio, irq, 0,
 							&davinci_gpio_irq_ops,
 							chips);
 		if (!irq_domain) {
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index cf41440..d9ab0cd 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -196,6 +196,44 @@
 	return 0;
 }
 
+static void gpio_rcar_irq_bus_lock(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+	pm_runtime_get_sync(&p->pdev->dev);
+}
+
+static void gpio_rcar_irq_bus_sync_unlock(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+	pm_runtime_put(&p->pdev->dev);
+}
+
+
+static int gpio_rcar_irq_request_resources(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+	int error;
+
+	error = pm_runtime_get_sync(&p->pdev->dev);
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+static void gpio_rcar_irq_release_resources(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+	pm_runtime_put(&p->pdev->dev);
+}
+
 static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id)
 {
 	struct gpio_rcar_priv *p = dev_id;
@@ -450,6 +488,10 @@
 	irq_chip->irq_unmask = gpio_rcar_irq_enable;
 	irq_chip->irq_set_type = gpio_rcar_irq_set_type;
 	irq_chip->irq_set_wake = gpio_rcar_irq_set_wake;
+	irq_chip->irq_bus_lock = gpio_rcar_irq_bus_lock;
+	irq_chip->irq_bus_sync_unlock = gpio_rcar_irq_bus_sync_unlock;
+	irq_chip->irq_request_resources = gpio_rcar_irq_request_resources;
+	irq_chip->irq_release_resources = gpio_rcar_irq_release_resources;
 	irq_chip->flags	= IRQCHIP_SET_TYPE_MASKED | IRQCHIP_MASK_ON_SUSPEND;
 
 	ret = gpiochip_add_data(gpio_chip, p);
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 66f729e..20c9539 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -25,7 +25,7 @@
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
 
 # add asic specific block
-amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o gmc_v7_0.o cik_ih.o kv_smc.o kv_dpm.o \
+amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
 	amdgpu_amdkfd_gfx_v7.o
 
@@ -34,6 +34,7 @@
 
 # add GMC block
 amdgpu-y += \
+	gmc_v7_0.o \
 	gmc_v8_0.o
 
 # add IH block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 313b0cc..5e7770f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -87,6 +87,8 @@
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_enable_semaphores;
 extern int amdgpu_powerplay;
+extern unsigned amdgpu_pcie_gen_cap;
+extern unsigned amdgpu_pcie_lane_cap;
 
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@@ -132,47 +134,6 @@
 #define AMDGPU_RESET_VCE			(1 << 13)
 #define AMDGPU_RESET_VCE1			(1 << 14)
 
-/* CG block flags */
-#define AMDGPU_CG_BLOCK_GFX			(1 << 0)
-#define AMDGPU_CG_BLOCK_MC			(1 << 1)
-#define AMDGPU_CG_BLOCK_SDMA			(1 << 2)
-#define AMDGPU_CG_BLOCK_UVD			(1 << 3)
-#define AMDGPU_CG_BLOCK_VCE			(1 << 4)
-#define AMDGPU_CG_BLOCK_HDP			(1 << 5)
-#define AMDGPU_CG_BLOCK_BIF			(1 << 6)
-
-/* CG flags */
-#define AMDGPU_CG_SUPPORT_GFX_MGCG		(1 << 0)
-#define AMDGPU_CG_SUPPORT_GFX_MGLS		(1 << 1)
-#define AMDGPU_CG_SUPPORT_GFX_CGCG		(1 << 2)
-#define AMDGPU_CG_SUPPORT_GFX_CGLS		(1 << 3)
-#define AMDGPU_CG_SUPPORT_GFX_CGTS		(1 << 4)
-#define AMDGPU_CG_SUPPORT_GFX_CGTS_LS		(1 << 5)
-#define AMDGPU_CG_SUPPORT_GFX_CP_LS		(1 << 6)
-#define AMDGPU_CG_SUPPORT_GFX_RLC_LS		(1 << 7)
-#define AMDGPU_CG_SUPPORT_MC_LS			(1 << 8)
-#define AMDGPU_CG_SUPPORT_MC_MGCG		(1 << 9)
-#define AMDGPU_CG_SUPPORT_SDMA_LS		(1 << 10)
-#define AMDGPU_CG_SUPPORT_SDMA_MGCG		(1 << 11)
-#define AMDGPU_CG_SUPPORT_BIF_LS		(1 << 12)
-#define AMDGPU_CG_SUPPORT_UVD_MGCG		(1 << 13)
-#define AMDGPU_CG_SUPPORT_VCE_MGCG		(1 << 14)
-#define AMDGPU_CG_SUPPORT_HDP_LS		(1 << 15)
-#define AMDGPU_CG_SUPPORT_HDP_MGCG		(1 << 16)
-
-/* PG flags */
-#define AMDGPU_PG_SUPPORT_GFX_PG		(1 << 0)
-#define AMDGPU_PG_SUPPORT_GFX_SMG		(1 << 1)
-#define AMDGPU_PG_SUPPORT_GFX_DMG		(1 << 2)
-#define AMDGPU_PG_SUPPORT_UVD			(1 << 3)
-#define AMDGPU_PG_SUPPORT_VCE			(1 << 4)
-#define AMDGPU_PG_SUPPORT_CP			(1 << 5)
-#define AMDGPU_PG_SUPPORT_GDS			(1 << 6)
-#define AMDGPU_PG_SUPPORT_RLC_SMU_HS		(1 << 7)
-#define AMDGPU_PG_SUPPORT_SDMA			(1 << 8)
-#define AMDGPU_PG_SUPPORT_ACP			(1 << 9)
-#define AMDGPU_PG_SUPPORT_SAMU			(1 << 10)
-
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
 #define AMDGPU_GFX_SAFE_MODE			0x00000001L
@@ -606,8 +567,6 @@
 	uint32_t		align;
 };
 
-struct amdgpu_sa_bo;
-
 /* sub-allocation buffer */
 struct amdgpu_sa_bo {
 	struct list_head		olist;
@@ -2278,60 +2237,60 @@
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-	      (adev)->pm.funcs->get_temperature((adev))
+	      (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-	      (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+	      (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-	      (adev)->pm.funcs->get_fan_control_mode((adev))
+	      (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-		(adev)->pm.funcs->get_sclk((adev), (l))
+		(adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-	      (adev)->pm.funcs->get_mclk((adev), (l))
+	      (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-	      (adev)->pm.funcs->force_performance_level((adev), (l))
+	      (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-	      (adev)->pm.funcs->powergate_uvd((adev), (g))
+	      (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-	      (adev)->pm.funcs->powergate_vce((adev), (g))
+	      (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-	      (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+	      (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
 	(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
@@ -2360,6 +2319,8 @@
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+				  unsigned long end);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 0e13763..362bedc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -154,7 +154,7 @@
 	.get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 {
 	return (struct kfd2kgd_calls *)&kfd2kgd;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 79fa5c7..04b744d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -115,7 +115,7 @@
 	.get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 {
 	return (struct kfd2kgd_calls *)&kfd2kgd;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index a081dda..7a4b101 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -795,6 +795,12 @@
 	case CGS_SYSTEM_INFO_PCIE_MLW:
 		sys_info->value = adev->pm.pcie_mlw_mask;
 		break;
+	case CGS_SYSTEM_INFO_CG_FLAGS:
+		sys_info->value = adev->cg_flags;
+		break;
+	case CGS_SYSTEM_INFO_PG_FLAGS:
+		sys_info->value = adev->pg_flags;
+		break;
 	default:
 		return -ENODEV;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 89c3dd6..119cdc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -77,7 +77,7 @@
 			} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
 				/* Don't try to start link training before we
 				 * have the dpcd */
-				if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+				if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
 					return;
 
 				/* set it to OFF so that drm_helper_connector_dpms()
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6f89f8e..b882e81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -478,9 +478,9 @@
 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 	unsigned i;
 
-	amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
 	if (!error) {
+		amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
 		 * This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6553146..51bfc11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1795,15 +1795,20 @@
 	}
 
 	/* post card */
-	amdgpu_atom_asic_init(adev->mode_info.atom_context);
+	if (!amdgpu_card_posted(adev))
+		amdgpu_atom_asic_init(adev->mode_info.atom_context);
 
 	r = amdgpu_resume(adev);
+	if (r)
+		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
 
 	amdgpu_fence_driver_resume(adev);
 
-	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);
+	if (resume) {
+		r = amdgpu_ib_ring_tests(adev);
+		if (r)
+			DRM_ERROR("ib ring test failed (%d).\n", r);
+	}
 
 	r = amdgpu_late_init(adev);
 	if (r)
@@ -1933,80 +1938,97 @@
 	return r;
 }
 
+#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007  /* gen: chipset 1/2, asic 1/2/3 */
+#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
+
 void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
 	int ret;
 
-	if (pci_is_root_bus(adev->pdev->bus))
+	if (amdgpu_pcie_gen_cap)
+		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
+
+	if (amdgpu_pcie_lane_cap)
+		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
+
+	/* covers APUs as well */
+	if (pci_is_root_bus(adev->pdev->bus)) {
+		if (adev->pm.pcie_gen_mask == 0)
+			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+		if (adev->pm.pcie_mlw_mask == 0)
+			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		return;
-
-	if (amdgpu_pcie_gen2 == 0)
-		return;
-
-	if (adev->flags & AMD_IS_APU)
-		return;
-
-	ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-	if (!ret) {
-		adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
-					  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
-					  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
-
-		if (mask & DRM_PCIE_SPEED_25)
-			adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
-		if (mask & DRM_PCIE_SPEED_50)
-			adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
-		if (mask & DRM_PCIE_SPEED_80)
-			adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
 	}
-	ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
-	if (!ret) {
-		switch (mask) {
-		case 32:
-			adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-			break;
-		case 16:
-			adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-			break;
-		case 12:
-			adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-			break;
-		case 8:
-			adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-			break;
-		case 4:
-			adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-			break;
-		case 2:
-			adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
-						  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
-			break;
-		case 1:
-			adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
-			break;
-		default:
-			break;
+
+	if (adev->pm.pcie_gen_mask == 0) {
+		ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
+		if (!ret) {
+			adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+
+			if (mask & DRM_PCIE_SPEED_25)
+				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+			if (mask & DRM_PCIE_SPEED_50)
+				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
+			if (mask & DRM_PCIE_SPEED_80)
+				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
+		} else {
+			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+		}
+	}
+	if (adev->pm.pcie_mlw_mask == 0) {
+		ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
+		if (!ret) {
+			switch (mask) {
+			case 32:
+				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case 16:
+				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case 12:
+				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case 8:
+				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case 4:
+				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case 2:
+				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case 1:
+				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+				break;
+			default:
+				break;
+			}
+		} else {
+			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index acd066d0..1846d65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -72,8 +72,8 @@
 
 	struct drm_crtc *crtc = &amdgpuCrtc->base;
 	unsigned long flags;
-	unsigned i;
-	int vpos, hpos, stat, min_udelay;
+	unsigned i, repcnt = 4;
+	int vpos, hpos, stat, min_udelay = 0;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
 	amdgpu_flip_wait_fence(adev, &work->excl);
@@ -96,7 +96,7 @@
 	 * In practice this won't execute very often unless on very fast
 	 * machines because the time window for this to happen is very small.
 	 */
-	for (;;) {
+	while (amdgpuCrtc->enabled && --repcnt) {
 		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
 		 * start in hpos, and to the "fudged earlier" vblank start in
 		 * vpos.
@@ -112,12 +112,24 @@
 			break;
 
 		/* Sleep at least until estimated real start of hw vblank */
-		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		if (min_udelay > vblank->framedur_ns / 2000) {
+			/* Don't wait ridiculously long - something is wrong */
+			repcnt = 0;
+			break;
+		}
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	};
 
+	if (!repcnt)
+		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+				 "framedur %d, linedur %d, stat %d, vpos %d, "
+				 "hpos %d\n", work->crtc_id, min_udelay,
+				 vblank->framedur_ns / 1000,
+				 vblank->linedur_ns / 1000, stat, vpos, hpos);
+
 	/* do the flip (mmio) */
 	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 	/* set the flip status */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b5dbbb5..9ef1db8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -83,6 +83,8 @@
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_enable_semaphores = 0;
 int amdgpu_powerplay = -1;
+unsigned amdgpu_pcie_gen_cap = 0;
+unsigned amdgpu_pcie_lane_cap = 0;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -170,6 +172,12 @@
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
 #endif
 
+MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
+module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+
+MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
+module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+
 static struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
@@ -256,11 +264,11 @@
 	{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
 #endif
 	/* topaz */
-	{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
 	/* tonga */
 	{0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
 	{0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index cfb6caa..9191467 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -333,6 +333,10 @@
 	if (!adev->mode_info.mode_config_initialized)
 		return 0;
 
+	/* don't init fbdev if there are no connectors */
+	if (list_empty(&adev->ddev->mode_config.connector_list))
+		return 0;
+
 	/* select 8 bpp console on low vram cards */
 	if (adev->mc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7380f78..d20c2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -596,7 +596,8 @@
 		break;
 	}
 	ttm_eu_backoff_reservation(&ticket, &list);
-	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
+	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
+	    !amdgpu_vm_debug)
 		amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
 
 	drm_gem_object_unreference_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index b1969f2..d4e2780 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -142,7 +142,8 @@
 
 		list_for_each_entry(bo, &node->bos, mn_list) {
 
-			if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+			if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
+							  end))
 				continue;
 
 			r = amdgpu_bo_reserve(bo, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c3ce103..b8fbbd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -261,6 +262,13 @@
 				       AMDGPU_GEM_DOMAIN_OA);
 
 	bo->flags = flags;
+
+	/* For architectures that don't support WC memory,
+	 * mask out the WC flag from the BO
+	 */
+	if (!drm_arch_can_wc_memory())
+		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
 	amdgpu_fill_placement_to_bo(bo, placement);
 	/* Kernel allocation are uninterruptible */
 	r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
@@ -399,7 +407,8 @@
 		}
 		if (fpfn > bo->placements[i].fpfn)
 			bo->placements[i].fpfn = fpfn;
-		if (lpfn && lpfn < bo->placements[i].lpfn)
+		if (!bo->placements[i].lpfn ||
+		    (lpfn && lpfn < bo->placements[i].lpfn))
 			bo->placements[i].lpfn = lpfn;
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 7d8d84e..95a4a25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -113,6 +113,10 @@
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return snprintf(buf, PAGE_SIZE, "off\n");
+
 	if (adev->pp_enabled) {
 		enum amd_dpm_forced_level level;
 
@@ -140,6 +144,11 @@
 	enum amdgpu_dpm_forced_level level;
 	int ret = 0;
 
+	/* Can't force performance level when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (strncmp("low", buf, strlen("low")) == 0) {
 		level = AMDGPU_DPM_FORCED_LEVEL_LOW;
 	} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -157,6 +166,7 @@
 		mutex_lock(&adev->pm.mutex);
 		if (adev->pm.dpm.thermal_active) {
 			count = -EINVAL;
+			mutex_unlock(&adev->pm.mutex);
 			goto fail;
 		}
 		ret = amdgpu_dpm_force_performance_level(adev, level);
@@ -167,8 +177,6 @@
 		mutex_unlock(&adev->pm.mutex);
 	}
 fail:
-	mutex_unlock(&adev->pm.mutex);
-
 	return count;
 }
 
@@ -182,8 +190,14 @@
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
 	int temp;
 
+	/* Can't get temperature when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (!adev->pp_enabled && !adev->pm.funcs->get_temperature)
 		temp = 0;
 	else
@@ -634,11 +648,6 @@
 
 	/* update display watermarks based on new power state */
 	amdgpu_display_bandwidth_update(adev);
-	/* update displays */
-	amdgpu_dpm_display_configuration_changed(adev);
-
-	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
-	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
 
 	/* wait for the rings to drain */
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -655,6 +664,12 @@
 
 	amdgpu_dpm_post_set_power_state(adev);
 
+	/* update displays */
+	amdgpu_dpm_display_configuration_changed(adev);
+
+	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
+	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
+
 	if (adev->pm.funcs->force_performance_level) {
 		if (adev->pm.dpm.thermal_active) {
 			enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level;
@@ -847,12 +862,16 @@
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_device *ddev = adev->ddev;
 
 	if (!adev->pm.dpm_enabled) {
 		seq_printf(m, "dpm not enabled\n");
 		return 0;
 	}
-	if (adev->pp_enabled) {
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
+		seq_printf(m, "PX asic powered off\n");
+	} else if (adev->pp_enabled) {
 		amdgpu_dpm_debugfs_print_current_performance_level(adev, m);
 	} else {
 		mutex_lock(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 5ee9a06..3cb6d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -99,13 +99,24 @@
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 	switch (adev->asic_type) {
-		case CHIP_TONGA:
-		case CHIP_FIJI:
-			adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-			break;
-		default:
-			adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-			break;
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+		adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+		break;
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+		break;
+	/* These chips don't have powerplay implemenations */
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	case CHIP_KAVERI:
+	case CHIP_TOPAZ:
+	default:
+		adev->pp_enabled = false;
+		break;
 	}
 #else
 	adev->pp_enabled = false;
@@ -132,8 +143,10 @@
 					adev->powerplay.pp_handle);
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled)
+	if (adev->pp_enabled) {
 		amdgpu_pm_sysfs_init(adev);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
+	}
 #endif
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 78e9b0f..d1f234d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -487,7 +487,7 @@
 	seq_printf(m, "rptr: 0x%08x [%5d]\n",
 		   rptr, rptr);
 
-	rptr_next = ~0;
+	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
 		   ring->wptr, ring->wptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 8b88edb..ca72a2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -354,12 +354,15 @@
 
 		for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
 			if (fences[i])
-				fences[count++] = fences[i];
+				fences[count++] = fence_get(fences[i]);
 
 		if (count) {
 			spin_unlock(&sa_manager->wq.lock);
 			t = fence_wait_any_timeout(fences, count, false,
 						   MAX_SCHEDULE_TIMEOUT);
+			for (i = 0; i < count; ++i)
+				fence_put(fences[i]);
+
 			r = (t > 0) ? 0 : t;
 			spin_lock(&sa_manager->wq.lock);
 		} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8a1752f..1cbb16e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -712,7 +712,7 @@
 						       0, PAGE_SIZE,
 						       PCI_DMA_BIDIRECTIONAL);
 		if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) {
-			while (--i) {
+			while (i--) {
 				pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
 					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 				gtt->ttm.dma_address[i] = 0;
@@ -783,6 +783,25 @@
 	return !!gtt->userptr;
 }
 
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+				  unsigned long end)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	unsigned long size;
+
+	if (gtt == NULL)
+		return false;
+
+	if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
+		return false;
+
+	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
+	if (gtt->userptr > end || gtt->userptr + size <= start)
+		return false;
+
+	return true;
+}
+
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -808,7 +827,7 @@
 			flags |= AMDGPU_PTE_SNOOPED;
 	}
 
-	if (adev->asic_type >= CHIP_TOPAZ)
+	if (adev->asic_type >= CHIP_TONGA)
 		flags |= AMDGPU_PTE_EXECUTABLE;
 
 	flags |= AMDGPU_PTE_READABLE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index aefc668..9599f75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1282,7 +1282,7 @@
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT * 8);
-	unsigned pd_size, pd_entries, pts_size;
+	unsigned pd_size, pd_entries;
 	int i, r;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
 	/* allocate page table array */
-	pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+	vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
 	if (vm->page_tables == NULL) {
 		DRM_ERROR("Cannot allocate memory for page table array\n");
 		return -ENOMEM;
@@ -1361,7 +1360,7 @@
 
 	for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
 		amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-	kfree(vm->page_tables);
+	drm_free_large(vm->page_tables);
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 21aacc1..bf731e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -265,15 +265,27 @@
 	unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
 	unsigned lane_num, i, max_pix_clock;
 
-	for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
-			max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+	if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+	    ENCODER_OBJECT_ID_NUTMEG) {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			max_pix_clock = (lane_num * 270000 * 8) / bpp;
 			if (max_pix_clock >= pix_clock) {
 				*dp_lanes = lane_num;
-				*dp_rate = link_rates[i];
+				*dp_rate = 270000;
 				return 0;
 			}
 		}
+	} else {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+				if (max_pix_clock >= pix_clock) {
+					*dp_lanes = lane_num;
+					*dp_rate = link_rates[i];
+					return 0;
+				}
+			}
+		}
 	}
 
 	return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 8b4731d..474ca02 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -31,6 +31,7 @@
 #include "ci_dpm.h"
 #include "gfx_v7_0.h"
 #include "atom.h"
+#include "amd_pcie.h"
 #include <linux/seq_file.h>
 
 #include "smu/smu_7_0_1_d.h"
@@ -5835,18 +5836,16 @@
 	u8 frev, crev;
 	struct ci_power_info *pi;
 	int ret;
-	u32 mask;
 
 	pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL);
 	if (pi == NULL)
 		return -ENOMEM;
 	adev->pm.dpm.priv = pi;
 
-	ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-	if (ret)
-		pi->sys_pcie_mask = 0;
-	else
-		pi->sys_pcie_mask = mask;
+	pi->sys_pcie_mask =
+		(adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
+		CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+
 	pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
 
 	pi->pcie_gen_performance.max = AMDGPU_PCIE_GEN1;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index fd9c958..155965e 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1762,6 +1762,9 @@
 	if (amdgpu_aspm == 0)
 		return;
 
+	if (pci_is_root_bus(adev->pdev->bus))
+		return;
+
 	/* XXX double check APUs */
 	if (adev->flags & AMD_IS_APU)
 		return;
@@ -2332,72 +2335,72 @@
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
 		adev->cg_flags =
-			AMDGPU_CG_SUPPORT_GFX_MGCG |
-			AMDGPU_CG_SUPPORT_GFX_MGLS |
-			/*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-			AMDGPU_CG_SUPPORT_GFX_CGLS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
-			AMDGPU_CG_SUPPORT_GFX_CP_LS |
-			AMDGPU_CG_SUPPORT_MC_LS |
-			AMDGPU_CG_SUPPORT_MC_MGCG |
-			AMDGPU_CG_SUPPORT_SDMA_MGCG |
-			AMDGPU_CG_SUPPORT_SDMA_LS |
-			AMDGPU_CG_SUPPORT_BIF_LS |
-			AMDGPU_CG_SUPPORT_VCE_MGCG |
-			AMDGPU_CG_SUPPORT_UVD_MGCG |
-			AMDGPU_CG_SUPPORT_HDP_LS |
-			AMDGPU_CG_SUPPORT_HDP_MGCG;
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x14;
 		break;
 	case CHIP_HAWAII:
 		adev->cg_flags =
-			AMDGPU_CG_SUPPORT_GFX_MGCG |
-			AMDGPU_CG_SUPPORT_GFX_MGLS |
-			/*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-			AMDGPU_CG_SUPPORT_GFX_CGLS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS |
-			AMDGPU_CG_SUPPORT_GFX_CP_LS |
-			AMDGPU_CG_SUPPORT_MC_LS |
-			AMDGPU_CG_SUPPORT_MC_MGCG |
-			AMDGPU_CG_SUPPORT_SDMA_MGCG |
-			AMDGPU_CG_SUPPORT_SDMA_LS |
-			AMDGPU_CG_SUPPORT_BIF_LS |
-			AMDGPU_CG_SUPPORT_VCE_MGCG |
-			AMDGPU_CG_SUPPORT_UVD_MGCG |
-			AMDGPU_CG_SUPPORT_HDP_LS |
-			AMDGPU_CG_SUPPORT_HDP_MGCG;
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
 		adev->pg_flags = 0;
 		adev->external_rev_id = 0x28;
 		break;
 	case CHIP_KAVERI:
 		adev->cg_flags =
-			AMDGPU_CG_SUPPORT_GFX_MGCG |
-			AMDGPU_CG_SUPPORT_GFX_MGLS |
-			/*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-			AMDGPU_CG_SUPPORT_GFX_CGLS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
-			AMDGPU_CG_SUPPORT_GFX_CP_LS |
-			AMDGPU_CG_SUPPORT_SDMA_MGCG |
-			AMDGPU_CG_SUPPORT_SDMA_LS |
-			AMDGPU_CG_SUPPORT_BIF_LS |
-			AMDGPU_CG_SUPPORT_VCE_MGCG |
-			AMDGPU_CG_SUPPORT_UVD_MGCG |
-			AMDGPU_CG_SUPPORT_HDP_LS |
-			AMDGPU_CG_SUPPORT_HDP_MGCG;
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
 		adev->pg_flags =
-			/*AMDGPU_PG_SUPPORT_GFX_PG |
-			  AMDGPU_PG_SUPPORT_GFX_SMG |
-			  AMDGPU_PG_SUPPORT_GFX_DMG |*/
-			AMDGPU_PG_SUPPORT_UVD |
-			/*AMDGPU_PG_SUPPORT_VCE |
-			  AMDGPU_PG_SUPPORT_CP |
-			  AMDGPU_PG_SUPPORT_GDS |
-			  AMDGPU_PG_SUPPORT_RLC_SMU_HS |
-			  AMDGPU_PG_SUPPORT_ACP |
-			  AMDGPU_PG_SUPPORT_SAMU |*/
+			/*AMD_PG_SUPPORT_GFX_PG |
+			  AMD_PG_SUPPORT_GFX_SMG |
+			  AMD_PG_SUPPORT_GFX_DMG |*/
+			AMD_PG_SUPPORT_UVD |
+			/*AMD_PG_SUPPORT_VCE |
+			  AMD_PG_SUPPORT_CP |
+			  AMD_PG_SUPPORT_GDS |
+			  AMD_PG_SUPPORT_RLC_SMU_HS |
+			  AMD_PG_SUPPORT_ACP |
+			  AMD_PG_SUPPORT_SAMU |*/
 			0;
 		if (adev->pdev->device == 0x1312 ||
 			adev->pdev->device == 0x1316 ||
@@ -2409,29 +2412,29 @@
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
 		adev->cg_flags =
-			AMDGPU_CG_SUPPORT_GFX_MGCG |
-			AMDGPU_CG_SUPPORT_GFX_MGLS |
-			/*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
-			AMDGPU_CG_SUPPORT_GFX_CGLS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS |
-			AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
-			AMDGPU_CG_SUPPORT_GFX_CP_LS |
-			AMDGPU_CG_SUPPORT_SDMA_MGCG |
-			AMDGPU_CG_SUPPORT_SDMA_LS |
-			AMDGPU_CG_SUPPORT_BIF_LS |
-			AMDGPU_CG_SUPPORT_VCE_MGCG |
-			AMDGPU_CG_SUPPORT_UVD_MGCG |
-			AMDGPU_CG_SUPPORT_HDP_LS |
-			AMDGPU_CG_SUPPORT_HDP_MGCG;
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
 		adev->pg_flags =
-			/*AMDGPU_PG_SUPPORT_GFX_PG |
-			  AMDGPU_PG_SUPPORT_GFX_SMG | */
-			AMDGPU_PG_SUPPORT_UVD |
-			/*AMDGPU_PG_SUPPORT_VCE |
-			  AMDGPU_PG_SUPPORT_CP |
-			  AMDGPU_PG_SUPPORT_GDS |
-			  AMDGPU_PG_SUPPORT_RLC_SMU_HS |
-			  AMDGPU_PG_SUPPORT_SAMU |*/
+			/*AMD_PG_SUPPORT_GFX_PG |
+			  AMD_PG_SUPPORT_GFX_SMG | */
+			AMD_PG_SUPPORT_UVD |
+			/*AMD_PG_SUPPORT_VCE |
+			  AMD_PG_SUPPORT_CP |
+			  AMD_PG_SUPPORT_GDS |
+			  AMD_PG_SUPPORT_RLC_SMU_HS |
+			  AMD_PG_SUPPORT_SAMU |*/
 			0;
 		if (adev->asic_type == CHIP_KABINI) {
 			if (adev->rev_id == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 5f712ce..c55ecf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -885,7 +885,7 @@
 {
 	u32 orig, data;
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_SDMA_MGCG)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
 		WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
 		WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
 	} else {
@@ -906,7 +906,7 @@
 {
 	u32 orig, data;
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_SDMA_LS)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
 		orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
 		data |= 0x100;
 		if (orig != data)
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 4dd17f2..e7ef226 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -445,13 +445,13 @@
 	pi->gfx_pg_threshold = 500;
 	pi->caps_fps = true;
 	/* uvd */
-	pi->caps_uvd_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_UVD) ? true : false;
+	pi->caps_uvd_pg = (adev->pg_flags & AMD_PG_SUPPORT_UVD) ? true : false;
 	pi->caps_uvd_dpm = true;
 	/* vce */
-	pi->caps_vce_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_VCE) ? true : false;
+	pi->caps_vce_pg = (adev->pg_flags & AMD_PG_SUPPORT_VCE) ? true : false;
 	pi->caps_vce_dpm = true;
 	/* acp */
-	pi->caps_acp_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_ACP) ? true : false;
+	pi->caps_acp_pg = (adev->pg_flags & AMD_PG_SUPPORT_ACP) ? true : false;
 	pi->caps_acp_dpm = true;
 
 	pi->caps_stable_power_state = false;
@@ -2202,8 +2202,7 @@
 							    AMD_PG_STATE_GATE);
 
 				cz_enable_vce_dpm(adev, false);
-				/* TODO: to figure out why vce can't be poweroff. */
-				/* cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF); */
+				cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF);
 				pi->vce_power_gated = true;
 			} else {
 				cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerON);
@@ -2226,10 +2225,8 @@
 		}
 	} else { /*pi->caps_vce_pg*/
 		cz_update_vce_dpm(adev);
-		cz_enable_vce_dpm(adev, true);
+		cz_enable_vce_dpm(adev, !gate);
 	}
-
-	return;
 }
 
 const struct amd_ip_funcs cz_dpm_ip_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 72793f9..06602df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3628,6 +3628,19 @@
 					unsigned vm_id, uint64_t pd_addr)
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
+				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, 4); /* poll interval */
+
 	if (usepfp) {
 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
@@ -4109,7 +4122,7 @@
 
 	orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGCG)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
 		gfx_v7_0_enable_gui_idle_interrupt(adev, true);
 
 		tmp = gfx_v7_0_halt_rlc(adev);
@@ -4147,9 +4160,9 @@
 {
 	u32 data, orig, tmp = 0;
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGCG)) {
-		if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) {
-			if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CP_LS) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
 				orig = data = RREG32(mmCP_MEM_SLP_CNTL);
 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
 				if (orig != data)
@@ -4176,14 +4189,14 @@
 
 		gfx_v7_0_update_rlc(adev, tmp);
 
-		if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS) {
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
 			orig = data = RREG32(mmCGTS_SM_CTRL_REG);
 			data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
-			if ((adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) &&
-			    (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS_LS))
+			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
+			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
 			data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
@@ -4249,7 +4262,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(mmRLC_PG_CNTL);
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
 		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
 	else
 		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
@@ -4263,7 +4276,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(mmRLC_PG_CNTL);
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
 		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
 	else
 		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
@@ -4276,7 +4289,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(mmRLC_PG_CNTL);
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_CP))
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
 		data &= ~0x8000;
 	else
 		data |= 0x8000;
@@ -4289,7 +4302,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(mmRLC_PG_CNTL);
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GDS))
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
 		data &= ~0x2000;
 	else
 		data |= 0x2000;
@@ -4370,7 +4383,7 @@
 {
 	u32 data, orig;
 
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) {
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
 		orig = data = RREG32(mmRLC_PG_CNTL);
 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
 		if (orig != data)
@@ -4442,7 +4455,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(mmRLC_PG_CNTL);
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG))
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
 	else
 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
@@ -4456,7 +4469,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(mmRLC_PG_CNTL);
-	if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_DMG))
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
 	else
 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
@@ -4623,15 +4636,15 @@
 
 static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
 {
-	if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
-			      AMDGPU_PG_SUPPORT_GFX_SMG |
-			      AMDGPU_PG_SUPPORT_GFX_DMG |
-			      AMDGPU_PG_SUPPORT_CP |
-			      AMDGPU_PG_SUPPORT_GDS |
-			      AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG |
+			      AMD_PG_SUPPORT_CP |
+			      AMD_PG_SUPPORT_GDS |
+			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
 		gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
 		gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
-		if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
 			gfx_v7_0_init_gfx_cgpg(adev);
 			gfx_v7_0_enable_cp_pg(adev, true);
 			gfx_v7_0_enable_gds_pg(adev, true);
@@ -4643,14 +4656,14 @@
 
 static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
 {
-	if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
-			      AMDGPU_PG_SUPPORT_GFX_SMG |
-			      AMDGPU_PG_SUPPORT_GFX_DMG |
-			      AMDGPU_PG_SUPPORT_CP |
-			      AMDGPU_PG_SUPPORT_GDS |
-			      AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG |
+			      AMD_PG_SUPPORT_CP |
+			      AMD_PG_SUPPORT_GDS |
+			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
 		gfx_v7_0_update_gfx_pg(adev, false);
-		if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
 			gfx_v7_0_enable_cp_pg(adev, false);
 			gfx_v7_0_enable_gds_pg(adev, false);
 		}
@@ -4738,6 +4751,22 @@
 	return 0;
 }
 
+static int gfx_v7_0_late_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+	if (r)
+		return r;
+
+	return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
@@ -4890,6 +4919,8 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 	gfx_v7_0_cp_enable(adev, false);
 	gfx_v7_0_rlc_stop(adev);
 	gfx_v7_0_fini_pg(adev);
@@ -5509,14 +5540,14 @@
 	if (state == AMD_PG_STATE_GATE)
 		gate = true;
 
-	if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
-			      AMDGPU_PG_SUPPORT_GFX_SMG |
-			      AMDGPU_PG_SUPPORT_GFX_DMG |
-			      AMDGPU_PG_SUPPORT_CP |
-			      AMDGPU_PG_SUPPORT_GDS |
-			      AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG |
+			      AMD_PG_SUPPORT_CP |
+			      AMD_PG_SUPPORT_GDS |
+			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
 		gfx_v7_0_update_gfx_pg(adev, gate);
-		if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
 			gfx_v7_0_enable_cp_pg(adev, gate);
 			gfx_v7_0_enable_gds_pg(adev, gate);
 		}
@@ -5527,7 +5558,7 @@
 
 const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
 	.early_init = gfx_v7_0_early_init,
-	.late_init = NULL,
+	.late_init = gfx_v7_0_late_init,
 	.sw_init = gfx_v7_0_sw_init,
 	.sw_fini = gfx_v7_0_sw_fini,
 	.hw_init = gfx_v7_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 13235d8..7086ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -111,7 +111,6 @@
 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
-MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 
 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
@@ -828,7 +827,8 @@
 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-	if (adev->asic_type != CHIP_STONEY) {
+	if ((adev->asic_type != CHIP_STONEY) &&
+	    (adev->asic_type != CHIP_TOPAZ)) {
 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 		if (!err) {
@@ -3851,10 +3851,16 @@
 			if (r)
 				return -EINVAL;
 
-			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-							AMDGPU_UCODE_ID_CP_MEC1);
-			if (r)
-				return -EINVAL;
+			if (adev->asic_type == CHIP_TOPAZ) {
+				r = gfx_v8_0_cp_compute_load_microcode(adev);
+				if (r)
+					return r;
+			} else {
+				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+										 AMDGPU_UCODE_ID_CP_MEC1);
+				if (r)
+					return -EINVAL;
+			}
 		}
 	}
 
@@ -3901,6 +3907,8 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 	gfx_v8_0_cp_enable(adev, false);
 	gfx_v8_0_rlc_stop(adev);
 	gfx_v8_0_cp_compute_fini(adev);
@@ -4186,7 +4194,18 @@
 		gfx_v8_0_cp_gfx_enable(adev, false);
 
 		/* Disable MEC parsing/prefetching */
-		/* XXX todo */
+		gfx_v8_0_cp_compute_enable(adev, false);
+
+		if (grbm_soft_reset || srbm_soft_reset) {
+			tmp = RREG32(mmGMCON_DEBUG);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_STALL, 1);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_CLEAR, 1);
+			WREG32(mmGMCON_DEBUG, tmp);
+
+			udelay(50);
+		}
 
 		if (grbm_soft_reset) {
 			tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4234,16 @@
 			WREG32(mmSRBM_SOFT_RESET, tmp);
 			tmp = RREG32(mmSRBM_SOFT_RESET);
 		}
+
+		if (grbm_soft_reset || srbm_soft_reset) {
+			tmp = RREG32(mmGMCON_DEBUG);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_STALL, 0);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_CLEAR, 0);
+			WREG32(mmGMCON_DEBUG, tmp);
+		}
+
 		/* Wait a little for things to settle down */
 		udelay(50);
 		gfx_v8_0_print_status((void *)adev);
@@ -4308,6 +4337,14 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+	if (r)
+		return r;
+
 	/* requires IBs so do in late init after IB pool is initialized */
 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
 	if (r)
@@ -4772,7 +4809,8 @@
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
-		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
+				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
+				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
 	amdgpu_ring_write(ring, addr & 0xfffffffc);
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
 	amdgpu_ring_write(ring, seq);
@@ -4958,7 +4996,7 @@
 	case AMDGPU_IRQ_STATE_ENABLE:
 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    PRIV_REG_INT_ENABLE, 0);
+					    PRIV_REG_INT_ENABLE, 1);
 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 3f95606..b806079 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -42,9 +42,39 @@
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
+
+static const u32 golden_settings_iceland_a11[] =
+{
+	mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+	mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+	mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+	mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+	mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_TOPAZ:
+		amdgpu_program_register_sequence(adev,
+						 iceland_mgcg_cgcg_init,
+						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
+		amdgpu_program_register_sequence(adev,
+						 golden_settings_iceland_a11,
+						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
+		break;
+	default:
+		break;
+	}
+}
 
 /**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
+ * gmc7_mc_wait_for_idle - wait for MC idle callback.
  *
  * @adev: amdgpu_device pointer
  *
@@ -132,13 +162,20 @@
 	case CHIP_HAWAII:
 		chip_name = "hawaii";
 		break;
+	case CHIP_TOPAZ:
+		chip_name = "topaz";
+		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
 		return 0;
 	default: BUG();
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+	if (adev->asic_type == CHIP_TOPAZ)
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+	else
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+
 	err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
 	if (err)
 		goto out;
@@ -755,7 +792,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
 		orig = data = RREG32(mc_cg_registers[i]);
-		if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
 			data |= mc_cg_ls_en[i];
 		else
 			data &= ~mc_cg_ls_en[i];
@@ -772,7 +809,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
 		orig = data = RREG32(mc_cg_registers[i]);
-		if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
 			data |= mc_cg_en[i];
 		else
 			data &= ~mc_cg_en[i];
@@ -788,7 +825,7 @@
 
 	orig = data = RREG32_PCIE(ixPCIE_CNTL2);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
 		data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
 		data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
 		data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -811,7 +848,7 @@
 
 	orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
 		data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
 	else
 		data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -827,7 +864,7 @@
 
 	orig = data = RREG32(mmHDP_MEM_POWER_LS);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
 		data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
 	else
 		data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -984,6 +1021,8 @@
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	gmc_v7_0_init_golden_registers(adev);
+
 	gmc_v7_0_mc_program(adev);
 
 	if (!(adev->flags & AMD_IS_APU)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index c0c9a01..3efd455 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -42,9 +42,7 @@
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 
-MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
-MODULE_FIRMWARE("amdgpu/fiji_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -75,19 +73,6 @@
 	mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
-static const u32 golden_settings_iceland_a11[] =
-{
-	mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-	mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-	mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-	mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
-};
-
-static const u32 iceland_mgcg_cgcg_init[] =
-{
-	mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
-};
-
 static const u32 cz_mgcg_cgcg_init[] =
 {
 	mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
@@ -102,14 +87,6 @@
 static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
-	case CHIP_TOPAZ:
-		amdgpu_program_register_sequence(adev,
-						 iceland_mgcg_cgcg_init,
-						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
-		amdgpu_program_register_sequence(adev,
-						 golden_settings_iceland_a11,
-						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
-		break;
 	case CHIP_FIJI:
 		amdgpu_program_register_sequence(adev,
 						 fiji_mgcg_cgcg_init,
@@ -229,15 +206,10 @@
 	DRM_DEBUG("\n");
 
 	switch (adev->asic_type) {
-	case CHIP_TOPAZ:
-		chip_name = "topaz";
-		break;
 	case CHIP_TONGA:
 		chip_name = "tonga";
 		break;
 	case CHIP_FIJI:
-		chip_name = "fiji";
-		break;
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 		return 0;
@@ -1007,7 +979,7 @@
 
 	gmc_v8_0_mc_program(adev);
 
-	if (!(adev->flags & AMD_IS_APU)) {
+	if (adev->asic_type == CHIP_TONGA) {
 		r = gmc_v8_0_mc_load_microcode(adev);
 		if (r) {
 			DRM_ERROR("Failed to load MC firmware!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index 966d4b2..090486c 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -432,7 +432,7 @@
 		case AMDGPU_UCODE_ID_CP_ME:
 			return UCODE_ID_CP_ME_MASK;
 		case AMDGPU_UCODE_ID_CP_MEC1:
-			return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK | UCODE_ID_CP_MEC_JT2_MASK;
+			return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK;
 		case AMDGPU_UCODE_ID_CP_MEC2:
 			return UCODE_ID_CP_MEC_MASK;
 		case AMDGPU_UCODE_ID_RLC_G:
@@ -522,12 +522,6 @@
 		return -EINVAL;
 	}
 
-	if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_CP_MEC_JT2,
-			&toc->entry[toc->num_entries++])) {
-		DRM_ERROR("Failed to get firmware entry for MEC_JT2\n");
-		return -EINVAL;
-	}
-
 	if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_SDMA0,
 			&toc->entry[toc->num_entries++])) {
 		DRM_ERROR("Failed to get firmware entry for SDMA0\n");
@@ -550,8 +544,8 @@
 			UCODE_ID_CP_ME_MASK |
 			UCODE_ID_CP_PFP_MASK |
 			UCODE_ID_CP_MEC_MASK |
-			UCODE_ID_CP_MEC_JT1_MASK |
-			UCODE_ID_CP_MEC_JT2_MASK;
+			UCODE_ID_CP_MEC_JT1_MASK;
+
 
 	if (iceland_send_msg_to_smc_with_parameter_without_waiting(adev, PPSMC_MSG_LoadUcodes, fw_to_load)) {
 		DRM_ERROR("Fail to request SMU load ucode\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 7e9154c..654d767 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2859,11 +2859,11 @@
 	pi->voltage_drop_t = 0;
 	pi->caps_sclk_throttle_low_notification = false;
 	pi->caps_fps = false; /* true? */
-	pi->caps_uvd_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_UVD) ? true : false;
+	pi->caps_uvd_pg = (adev->pg_flags & AMD_PG_SUPPORT_UVD) ? true : false;
 	pi->caps_uvd_dpm = true;
-	pi->caps_vce_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_VCE) ? true : false;
-	pi->caps_samu_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_SAMU) ? true : false;
-	pi->caps_acp_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_ACP) ? true : false;
+	pi->caps_vce_pg = (adev->pg_flags & AMD_PG_SUPPORT_VCE) ? true : false;
+	pi->caps_samu_pg = (adev->pg_flags & AMD_PG_SUPPORT_SAMU) ? true : false;
+	pi->caps_acp_pg = (adev->pg_flags & AMD_PG_SUPPORT_ACP) ? true : false;
 	pi->caps_stable_p_state = false;
 
 	ret = kv_parse_sys_info_table(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index f4a1346..0497784 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -122,25 +122,12 @@
 
 static int tonga_dpm_suspend(void *handle)
 {
-	return 0;
+	return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-	int ret;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	mutex_lock(&adev->pm.mutex);
-
-	ret = tonga_smu_start(adev);
-	if (ret) {
-		DRM_ERROR("SMU start failed\n");
-		goto fail;
-	}
-
-fail:
-	mutex_unlock(&adev->pm.mutex);
-	return ret;
+	return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 5e9f73a..fbd3767 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -611,7 +611,7 @@
 {
 	u32 orig, data;
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
 		data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
 		data = 0xfff;
 		WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
@@ -830,6 +830,9 @@
 	bool gate = false;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+		return 0;
+
 	if (state == AMD_CG_STATE_GATE)
 		gate = true;
 
@@ -848,7 +851,10 @@
 	 * revisit this when there is a cleaner line between
 	 * the smc and the hw blocks
 	 */
-	 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+		return 0;
 
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v4_2_stop(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 38864f56..57f1c5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -774,6 +774,11 @@
 static int uvd_v5_0_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+		return 0;
+
 	return 0;
 }
 
@@ -789,6 +794,9 @@
 	 */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+		return 0;
+
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v5_0_stop(adev);
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 3d591392..0b365b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -532,7 +532,7 @@
 	uvd_v6_0_mc_resume(adev);
 
 	/* Set dynamic clock gating in S/W control mode */
-	if (adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG) {
+	if (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG) {
 		if (adev->flags & AMD_IS_APU)
 			cz_set_uvd_clock_gating_branches(adev, false);
 		else
@@ -1000,7 +1000,7 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 
-	if (!(adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG))
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
 		return 0;
 
 	if (enable) {
@@ -1030,6 +1030,9 @@
 	 */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+		return 0;
+
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v6_0_stop(adev);
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 52ac7a8..a822eda 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -373,7 +373,7 @@
 {
 	bool sw_cg = false;
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
 		if (sw_cg)
 			vce_v2_0_set_sw_cg(adev, true);
 		else
@@ -608,6 +608,9 @@
 	 */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
+		return 0;
+
 	if (state == AMD_PG_STATE_GATE)
 		/* XXX do we need a vce_v2_0_stop()? */
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index e99af81..d662fa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -277,7 +277,7 @@
 		WREG32_P(mmVCE_STATUS, 0, ~1);
 
 		/* Set Clock-Gating off */
-		if (adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG)
+		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
 
 		if (r) {
@@ -676,7 +676,7 @@
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 	int i;
 
-	if (!(adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG))
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 		return 0;
 
 	mutex_lock(&adev->grbm_idx_mutex);
@@ -728,6 +728,9 @@
 	 */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
+		return 0;
+
 	if (state == AMD_PG_STATE_GATE)
 		/* XXX do we need a vce_v3_0_stop()? */
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 652e766..0d14d10 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -61,6 +61,7 @@
 #include "vi.h"
 #include "vi_dpm.h"
 #include "gmc_v8_0.h"
+#include "gmc_v7_0.h"
 #include "gfx_v8_0.h"
 #include "sdma_v2_4.h"
 #include "sdma_v3_0.h"
@@ -1109,10 +1110,10 @@
 	},
 	{
 		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 8,
-		.minor = 0,
+		.major = 7,
+		.minor = 4,
 		.rev = 0,
-		.funcs = &gmc_v8_0_ip_funcs,
+		.funcs = &gmc_v7_0_ip_funcs,
 	},
 	{
 		.type = AMD_IP_BLOCK_TYPE_IH,
@@ -1442,8 +1443,7 @@
 		break;
 	case CHIP_FIJI:
 		adev->has_uvd = true;
-		adev->cg_flags = AMDGPU_CG_SUPPORT_UVD_MGCG |
-				AMDGPU_CG_SUPPORT_VCE_MGCG;
+		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x3c;
 		break;
@@ -1457,8 +1457,7 @@
 	case CHIP_STONEY:
 		adev->has_uvd = true;
 		adev->cg_flags = 0;
-		/* Disable UVD pg */
-		adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
+		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 9be0070..a902ae0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -194,7 +194,7 @@
 
 	kfree(p);
 
-	kfree((void *)work);
+	kfree(work);
 }
 
 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 1195d06f..dbf7e64 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -85,6 +85,38 @@
 	AMD_PG_STATE_UNGATE,
 };
 
+/* CG flags */
+#define AMD_CG_SUPPORT_GFX_MGCG			(1 << 0)
+#define AMD_CG_SUPPORT_GFX_MGLS			(1 << 1)
+#define AMD_CG_SUPPORT_GFX_CGCG			(1 << 2)
+#define AMD_CG_SUPPORT_GFX_CGLS			(1 << 3)
+#define AMD_CG_SUPPORT_GFX_CGTS			(1 << 4)
+#define AMD_CG_SUPPORT_GFX_CGTS_LS		(1 << 5)
+#define AMD_CG_SUPPORT_GFX_CP_LS		(1 << 6)
+#define AMD_CG_SUPPORT_GFX_RLC_LS		(1 << 7)
+#define AMD_CG_SUPPORT_MC_LS			(1 << 8)
+#define AMD_CG_SUPPORT_MC_MGCG			(1 << 9)
+#define AMD_CG_SUPPORT_SDMA_LS			(1 << 10)
+#define AMD_CG_SUPPORT_SDMA_MGCG		(1 << 11)
+#define AMD_CG_SUPPORT_BIF_LS			(1 << 12)
+#define AMD_CG_SUPPORT_UVD_MGCG			(1 << 13)
+#define AMD_CG_SUPPORT_VCE_MGCG			(1 << 14)
+#define AMD_CG_SUPPORT_HDP_LS			(1 << 15)
+#define AMD_CG_SUPPORT_HDP_MGCG			(1 << 16)
+
+/* PG flags */
+#define AMD_PG_SUPPORT_GFX_PG			(1 << 0)
+#define AMD_PG_SUPPORT_GFX_SMG			(1 << 1)
+#define AMD_PG_SUPPORT_GFX_DMG			(1 << 2)
+#define AMD_PG_SUPPORT_UVD			(1 << 3)
+#define AMD_PG_SUPPORT_VCE			(1 << 4)
+#define AMD_PG_SUPPORT_CP			(1 << 5)
+#define AMD_PG_SUPPORT_GDS			(1 << 6)
+#define AMD_PG_SUPPORT_RLC_SMU_HS		(1 << 7)
+#define AMD_PG_SUPPORT_SDMA			(1 << 8)
+#define AMD_PG_SUPPORT_ACP			(1 << 9)
+#define AMD_PG_SUPPORT_SAMU			(1 << 10)
+
 enum amd_pm_state_type {
 	/* not used for dpm */
 	POWER_STATE_TYPE_DEFAULT,
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index 713aec9..aec38fc 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -109,6 +109,8 @@
 	CGS_SYSTEM_INFO_ADAPTER_BDF_ID = 1,
 	CGS_SYSTEM_INFO_PCIE_GEN_INFO,
 	CGS_SYSTEM_INFO_PCIE_MLW,
+	CGS_SYSTEM_INFO_CG_FLAGS,
+	CGS_SYSTEM_INFO_PG_FLAGS,
 	CGS_SYSTEM_INFO_ID_MAXIMUM,
 };
 
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8f5d5ed..589599f 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -64,6 +64,11 @@
 	if (ret == 0)
 		ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+	if (ret)
+		printk("amdgpu: powerplay initialization failed\n");
+	else
+		printk("amdgpu: powerplay initialized\n");
+
 	return ret;
 }
 
@@ -397,8 +402,11 @@
 
 		data.requested_ui_label = power_state_convert(ps);
 		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	}
-	break;
+	case AMD_PP_EVENT_COMPLETE_INIT:
+		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index 83be3cf..6b52c78 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
@@ -165,6 +165,7 @@
 };
 
 static const pem_event_action *complete_init_event[] = {
+	unblock_adjust_power_state_tasks,
 	adjust_power_state_tasks,
 	enable_gfx_clock_gating_tasks,
 	enable_gfx_voltage_island_power_gating_tasks,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
index 52a3efc..46410e3 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
@@ -31,7 +31,7 @@
 static int pem_init(struct pp_eventmgr *eventmgr)
 {
 	int result = 0;
-	struct pem_event_data event_data;
+	struct pem_event_data event_data = { {0} };
 
 	/* Initialize PowerPlay feature info */
 	pem_init_feature_info(eventmgr);
@@ -52,7 +52,7 @@
 
 static void pem_fini(struct pp_eventmgr *eventmgr)
 {
-	struct pem_event_data event_data;
+	struct pem_event_data event_data = { {0} };
 
 	pem_uninit_featureInfo(eventmgr);
 	pem_unregister_interrupts(eventmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index ad77008..ff08ce4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -226,7 +226,7 @@
 		}
 	} else {
 		cz_dpm_update_vce_dpm(hwmgr);
-		cz_enable_disable_vce_dpm(hwmgr, true);
+		cz_enable_disable_vce_dpm(hwmgr, !bgate);
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index 0874ab4..cf01177 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -174,6 +174,8 @@
 {
 	struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
 	uint32_t i;
+	struct cgs_system_info sys_info = {0};
+	int result;
 
 	cz_hwmgr->gfx_ramp_step = 256*25/100;
 
@@ -247,6 +249,22 @@
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 				   PHM_PlatformCaps_DisableVoltageIsland);
 
+	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+		      PHM_PlatformCaps_UVDPowerGating);
+	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+		      PHM_PlatformCaps_VCEPowerGating);
+	sys_info.size = sizeof(struct cgs_system_info);
+	sys_info.info_id = CGS_SYSTEM_INFO_PG_FLAGS;
+	result = cgs_query_system_info(hwmgr->device, &sys_info);
+	if (!result) {
+		if (sys_info.value & AMD_PG_SUPPORT_UVD)
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				      PHM_PlatformCaps_UVDPowerGating);
+		if (sys_info.value & AMD_PG_SUPPORT_VCE)
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				      PHM_PlatformCaps_VCEPowerGating);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 44a9250..980d3bf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -4451,6 +4451,7 @@
 	pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
 	phw_tonga_ulv_parm *ulv;
+	struct cgs_system_info sys_info = {0};
 
 	PP_ASSERT_WITH_CODE((NULL != hwmgr),
 		"Invalid Parameter!", return -1;);
@@ -4615,9 +4616,23 @@
 
 	data->vddc_phase_shed_control = 0;
 
-	if (0 == result) {
-		struct cgs_system_info sys_info = {0};
+	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+		      PHM_PlatformCaps_UVDPowerGating);
+	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+		      PHM_PlatformCaps_VCEPowerGating);
+	sys_info.size = sizeof(struct cgs_system_info);
+	sys_info.info_id = CGS_SYSTEM_INFO_PG_FLAGS;
+	result = cgs_query_system_info(hwmgr->device, &sys_info);
+	if (!result) {
+		if (sys_info.value & AMD_PG_SUPPORT_UVD)
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				      PHM_PlatformCaps_UVDPowerGating);
+		if (sys_info.value & AMD_PG_SUPPORT_VCE)
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				      PHM_PlatformCaps_VCEPowerGating);
+	}
 
+	if (0 == result) {
 		data->is_tlu_enabled = 0;
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 			TONGA_MAX_HARDWARE_POWERLEVELS;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 873a8d2..ec222c6 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -272,6 +272,9 @@
 				UCODE_ID_CP_MEC_JT1_MASK |
 				UCODE_ID_CP_MEC_JT2_MASK;
 
+	if (smumgr->chip_id == CHIP_STONEY)
+		fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
 	cz_request_smu_load_fw(smumgr);
 	cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@
 	return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
 			enum cz_scratch_entry firmware_enum)
 {
 	uint8_t ret = 0;
@@ -292,7 +295,10 @@
 		ret = UCODE_ID_SDMA0;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-		ret = UCODE_ID_SDMA1;
+		if (smumgr->chip_id == CHIP_STONEY)
+			ret = UCODE_ID_SDMA0;
+		else
+			ret = UCODE_ID_SDMA1;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
 		ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@
 		ret = UCODE_ID_CP_MEC_JT1;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-		ret = UCODE_ID_CP_MEC_JT2;
+		if (smumgr->chip_id == CHIP_STONEY)
+			ret = UCODE_ID_CP_MEC_JT1;
+		else
+			ret = UCODE_ID_CP_MEC_JT2;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
 		ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@
 	struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
 	task->type = type;
-	task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+	task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
 	task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
 	for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@
 	struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
 	task->type = TASK_TYPE_UCODE_LOAD;
-	task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+	task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
 	task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
 	for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@
 
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@
 
 	for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-		firmware_type = cz_translate_firmware_enum_to_arg(
+		firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
 					firmware_list[i]);
 
 		ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 9759009..b1480ac 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -227,7 +227,7 @@
 	} while (ast_read32(ast, 0x10000) != 0x01);
 	data = ast_read32(ast, 0x10004);
 
-	if (data & 0x400)
+	if (data & 0x40)
 		ast->dram_bus_width = 16;
 	else
 		ast->dram_bus_width = 32;
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 3f74193..9a7b446 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -65,8 +65,6 @@
 	 */
 	state->allow_modeset = true;
 
-	state->num_connector = ACCESS_ONCE(dev->mode_config.num_connector);
-
 	state->crtcs = kcalloc(dev->mode_config.num_crtc,
 			       sizeof(*state->crtcs), GFP_KERNEL);
 	if (!state->crtcs)
@@ -83,16 +81,6 @@
 				      sizeof(*state->plane_states), GFP_KERNEL);
 	if (!state->plane_states)
 		goto fail;
-	state->connectors = kcalloc(state->num_connector,
-				    sizeof(*state->connectors),
-				    GFP_KERNEL);
-	if (!state->connectors)
-		goto fail;
-	state->connector_states = kcalloc(state->num_connector,
-					  sizeof(*state->connector_states),
-					  GFP_KERNEL);
-	if (!state->connector_states)
-		goto fail;
 
 	state->dev = dev;
 
@@ -823,19 +811,27 @@
 
 	index = drm_connector_index(connector);
 
-	/*
-	 * Construction of atomic state updates can race with a connector
-	 * hot-add which might overflow. In this case flip the table and just
-	 * restart the entire ioctl - no one is fast enough to livelock a cpu
-	 * with physical hotplug events anyway.
-	 *
-	 * Note that we only grab the indexes once we have the right lock to
-	 * prevent hotplug/unplugging of connectors. So removal is no problem,
-	 * at most the array is a bit too large.
-	 */
 	if (index >= state->num_connector) {
-		DRM_DEBUG_ATOMIC("Hot-added connector would overflow state array, restarting\n");
-		return ERR_PTR(-EAGAIN);
+		struct drm_connector **c;
+		struct drm_connector_state **cs;
+		int alloc = max(index + 1, config->num_connector);
+
+		c = krealloc(state->connectors, alloc * sizeof(*state->connectors), GFP_KERNEL);
+		if (!c)
+			return ERR_PTR(-ENOMEM);
+
+		state->connectors = c;
+		memset(&state->connectors[state->num_connector], 0,
+		       sizeof(*state->connectors) * (alloc - state->num_connector));
+
+		cs = krealloc(state->connector_states, alloc * sizeof(*state->connector_states), GFP_KERNEL);
+		if (!cs)
+			return ERR_PTR(-ENOMEM);
+
+		state->connector_states = cs;
+		memset(&state->connector_states[state->num_connector], 0,
+		       sizeof(*state->connector_states) * (alloc - state->num_connector));
+		state->num_connector = alloc;
 	}
 
 	if (state->connector_states[index])
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 57cccd6..4f2d3e1 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -946,9 +946,23 @@
 	}
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-				struct drm_atomic_state *old_state,
-				struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc)
 {
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@
 
 	return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@
 		if (old_state->legacy_cursor_update)
 			continue;
 
-		if (!framebuffer_changed(dev, old_state, crtc))
+		if (!drm_atomic_helper_framebuffer_changed(dev,
+				old_state, crtc))
 			continue;
 
 		ret = drm_crtc_vblank_get(crtc);
@@ -1477,7 +1493,7 @@
 {
 	int i;
 
-	for (i = 0; i < dev->mode_config.num_connector; i++) {
+	for (i = 0; i < state->num_connector; i++) {
 		struct drm_connector *connector = state->connectors[i];
 
 		if (!connector)
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index d40bab2..f619121 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -918,12 +918,19 @@
 	connector->base.properties = &connector->properties;
 	connector->dev = dev;
 	connector->funcs = funcs;
+
+	connector->connector_id = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL);
+	if (connector->connector_id < 0) {
+		ret = connector->connector_id;
+		goto out_put;
+	}
+
 	connector->connector_type = connector_type;
 	connector->connector_type_id =
 		ida_simple_get(connector_ida, 1, 0, GFP_KERNEL);
 	if (connector->connector_type_id < 0) {
 		ret = connector->connector_type_id;
-		goto out_put;
+		goto out_put_id;
 	}
 	connector->name =
 		kasprintf(GFP_KERNEL, "%s-%d",
@@ -931,7 +938,7 @@
 			  connector->connector_type_id);
 	if (!connector->name) {
 		ret = -ENOMEM;
-		goto out_put;
+		goto out_put_type_id;
 	}
 
 	INIT_LIST_HEAD(&connector->probed_modes);
@@ -959,7 +966,12 @@
 	}
 
 	connector->debugfs_entry = NULL;
-
+out_put_type_id:
+	if (ret)
+		ida_remove(connector_ida, connector->connector_type_id);
+out_put_id:
+	if (ret)
+		ida_remove(&config->connector_ida, connector->connector_id);
 out_put:
 	if (ret)
 		drm_mode_object_put(dev, &connector->base);
@@ -996,6 +1008,9 @@
 	ida_remove(&drm_connector_enum_list[connector->connector_type].ida,
 		   connector->connector_type_id);
 
+	ida_remove(&dev->mode_config.connector_ida,
+		   connector->connector_id);
+
 	kfree(connector->display_info.bus_formats);
 	drm_mode_object_put(dev, &connector->base);
 	kfree(connector->name);
@@ -1013,32 +1028,6 @@
 EXPORT_SYMBOL(drm_connector_cleanup);
 
 /**
- * drm_connector_index - find the index of a registered connector
- * @connector: connector to find index for
- *
- * Given a registered connector, return the index of that connector within a DRM
- * device's list of connectors.
- */
-unsigned int drm_connector_index(struct drm_connector *connector)
-{
-	unsigned int index = 0;
-	struct drm_connector *tmp;
-	struct drm_mode_config *config = &connector->dev->mode_config;
-
-	WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
-
-	drm_for_each_connector(tmp, connector->dev) {
-		if (tmp == connector)
-			return index;
-
-		index++;
-	}
-
-	BUG();
-}
-EXPORT_SYMBOL(drm_connector_index);
-
-/**
  * drm_connector_register - register a connector
  * @connector: the connector to register
  *
@@ -5789,6 +5778,7 @@
 	INIT_LIST_HEAD(&dev->mode_config.plane_list);
 	idr_init(&dev->mode_config.crtc_idr);
 	idr_init(&dev->mode_config.tile_idr);
+	ida_init(&dev->mode_config.connector_ida);
 
 	drm_modeset_lock_all(dev);
 	drm_mode_create_standard_properties(dev);
@@ -5869,6 +5859,7 @@
 		crtc->funcs->destroy(crtc);
 	}
 
+	ida_destroy(&dev->mode_config.connector_ida);
 	idr_destroy(&dev->mode_config.tile_idr);
 	idr_destroy(&dev->mode_config.crtc_idr);
 	drm_modeset_lock_fini(&dev->mode_config.connection_mutex);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 6ed90a2..27fbd79 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -803,6 +803,18 @@
 	return mstb;
 }
 
+static void drm_dp_free_mst_port(struct kref *kref);
+
+static void drm_dp_free_mst_branch_device(struct kref *kref)
+{
+	struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
+	if (mstb->port_parent) {
+		if (list_empty(&mstb->port_parent->next))
+			kref_put(&mstb->port_parent->kref, drm_dp_free_mst_port);
+	}
+	kfree(mstb);
+}
+
 static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 {
 	struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
@@ -810,6 +822,15 @@
 	bool wake_tx = false;
 
 	/*
+	 * init kref again to be used by ports to remove mst branch when it is
+	 * not needed anymore
+	 */
+	kref_init(kref);
+
+	if (mstb->port_parent && list_empty(&mstb->port_parent->next))
+		kref_get(&mstb->port_parent->kref);
+
+	/*
 	 * destroy all ports - don't need lock
 	 * as there are no more references to the mst branch
 	 * device at this point.
@@ -835,7 +856,8 @@
 
 	if (wake_tx)
 		wake_up(&mstb->mgr->tx_waitq);
-	kfree(mstb);
+
+	kref_put(kref, drm_dp_free_mst_branch_device);
 }
 
 static void drm_dp_put_mst_branch_device(struct drm_dp_mst_branch *mstb)
@@ -883,6 +905,7 @@
 			 * from an EDID retrieval */
 
 			mutex_lock(&mgr->destroy_connector_lock);
+			kref_get(&port->parent->kref);
 			list_add(&port->next, &mgr->destroy_connector_list);
 			mutex_unlock(&mgr->destroy_connector_lock);
 			schedule_work(&mgr->destroy_connector_work);
@@ -1018,18 +1041,27 @@
 	return send_link;
 }
 
-static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb,
-				   struct drm_dp_mst_port *port)
+static void drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid)
 {
 	int ret;
-	if (port->dpcd_rev >= 0x12) {
-		port->guid_valid = drm_dp_validate_guid(mstb->mgr, port->guid);
-		if (!port->guid_valid) {
-			ret = drm_dp_send_dpcd_write(mstb->mgr,
-						     port,
-						     DP_GUID,
-						     16, port->guid);
-			port->guid_valid = true;
+
+	memcpy(mstb->guid, guid, 16);
+
+	if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) {
+		if (mstb->port_parent) {
+			ret = drm_dp_send_dpcd_write(
+					mstb->mgr,
+					mstb->port_parent,
+					DP_GUID,
+					16,
+					mstb->guid);
+		} else {
+
+			ret = drm_dp_dpcd_write(
+					mstb->mgr->aux,
+					DP_GUID,
+					mstb->guid,
+					16);
 		}
 	}
 }
@@ -1086,7 +1118,6 @@
 	port->dpcd_rev = port_msg->dpcd_revision;
 	port->num_sdp_streams = port_msg->num_sdp_streams;
 	port->num_sdp_stream_sinks = port_msg->num_sdp_stream_sinks;
-	memcpy(port->guid, port_msg->peer_guid, 16);
 
 	/* manage mstb port lists with mgr lock - take a reference
 	   for this list */
@@ -1099,11 +1130,9 @@
 
 	if (old_ddps != port->ddps) {
 		if (port->ddps) {
-			drm_dp_check_port_guid(mstb, port);
 			if (!port->input)
 				drm_dp_send_enum_path_resources(mstb->mgr, mstb, port);
 		} else {
-			port->guid_valid = false;
 			port->available_pbn = 0;
 			}
 	}
@@ -1162,10 +1191,8 @@
 
 	if (old_ddps != port->ddps) {
 		if (port->ddps) {
-			drm_dp_check_port_guid(mstb, port);
 			dowork = true;
 		} else {
-			port->guid_valid = false;
 			port->available_pbn = 0;
 		}
 	}
@@ -1222,13 +1249,14 @@
 	struct drm_dp_mst_branch *found_mstb;
 	struct drm_dp_mst_port *port;
 
+	if (memcmp(mstb->guid, guid, 16) == 0)
+		return mstb;
+
+
 	list_for_each_entry(port, &mstb->ports, next) {
 		if (!port->mstb)
 			continue;
 
-		if (port->guid_valid && memcmp(port->guid, guid, 16) == 0)
-			return port->mstb;
-
 		found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid);
 
 		if (found_mstb)
@@ -1247,10 +1275,7 @@
 	/* find the port by iterating down */
 	mutex_lock(&mgr->lock);
 
-	if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0)
-		mstb = mgr->mst_primary;
-	else
-		mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
+	mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
 
 	if (mstb)
 		kref_get(&mstb->kref);
@@ -1555,6 +1580,9 @@
 				       txmsg->reply.u.link_addr.ports[i].num_sdp_streams,
 				       txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks);
 			}
+
+			drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid);
+
 			for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) {
 				drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]);
 			}
@@ -1602,6 +1630,37 @@
 	return 0;
 }
 
+static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb)
+{
+	if (!mstb->port_parent)
+		return NULL;
+
+	if (mstb->port_parent->mstb != mstb)
+		return mstb->port_parent;
+
+	return drm_dp_get_last_connected_port_to_mstb(mstb->port_parent->parent);
+}
+
+static struct drm_dp_mst_branch *drm_dp_get_last_connected_port_and_mstb(struct drm_dp_mst_topology_mgr *mgr,
+									 struct drm_dp_mst_branch *mstb,
+									 int *port_num)
+{
+	struct drm_dp_mst_branch *rmstb = NULL;
+	struct drm_dp_mst_port *found_port;
+	mutex_lock(&mgr->lock);
+	if (mgr->mst_primary) {
+		found_port = drm_dp_get_last_connected_port_to_mstb(mstb);
+
+		if (found_port) {
+			rmstb = found_port->parent;
+			kref_get(&rmstb->kref);
+			*port_num = found_port->port_num;
+		}
+	}
+	mutex_unlock(&mgr->lock);
+	return rmstb;
+}
+
 static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 				   struct drm_dp_mst_port *port,
 				   int id,
@@ -1609,13 +1668,18 @@
 {
 	struct drm_dp_sideband_msg_tx *txmsg;
 	struct drm_dp_mst_branch *mstb;
-	int len, ret;
+	int len, ret, port_num;
 	u8 sinks[DRM_DP_MAX_SDP_STREAMS];
 	int i;
 
+	port_num = port->port_num;
 	mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent);
-	if (!mstb)
-		return -EINVAL;
+	if (!mstb) {
+		mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num);
+
+		if (!mstb)
+			return -EINVAL;
+	}
 
 	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
 	if (!txmsg) {
@@ -1627,7 +1691,7 @@
 		sinks[i] = i;
 
 	txmsg->dst = mstb;
-	len = build_allocate_payload(txmsg, port->port_num,
+	len = build_allocate_payload(txmsg, port_num,
 				     id,
 				     pbn, port->num_sdp_streams, sinks);
 
@@ -1983,6 +2047,12 @@
 		mgr->mst_primary = mstb;
 		kref_get(&mgr->mst_primary->kref);
 
+		ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
+							 DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
+		if (ret < 0) {
+			goto out_unlock;
+		}
+
 		{
 			struct drm_dp_payload reset_pay;
 			reset_pay.start_slot = 0;
@@ -1990,26 +2060,6 @@
 			drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
 		}
 
-		ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
-					 DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
-		if (ret < 0) {
-			goto out_unlock;
-		}
-
-
-		/* sort out guid */
-		ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, mgr->guid, 16);
-		if (ret != 16) {
-			DRM_DEBUG_KMS("failed to read DP GUID %d\n", ret);
-			goto out_unlock;
-		}
-
-		mgr->guid_valid = drm_dp_validate_guid(mgr, mgr->guid);
-		if (!mgr->guid_valid) {
-			ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, mgr->guid, 16);
-			mgr->guid_valid = true;
-		}
-
 		queue_work(system_long_wq, &mgr->work);
 
 		ret = 0;
@@ -2231,6 +2281,7 @@
 			}
 
 			drm_dp_update_port(mstb, &msg.u.conn_stat);
+
 			DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
 			(*mgr->cbs->hotplug)(mgr);
 
@@ -2446,6 +2497,7 @@
 		DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn);
 		if (pbn == port->vcpi.pbn) {
 			*slots = port->vcpi.num_slots;
+			drm_dp_put_port(port);
 			return true;
 		}
 	}
@@ -2605,32 +2657,31 @@
  */
 int drm_dp_calc_pbn_mode(int clock, int bpp)
 {
-	fixed20_12 pix_bw;
-	fixed20_12 fbpp;
-	fixed20_12 result;
-	fixed20_12 margin, tmp;
-	u32 res;
+	u64 kbps;
+	s64 peak_kbps;
+	u32 numerator;
+	u32 denominator;
 
-	pix_bw.full = dfixed_const(clock);
-	fbpp.full = dfixed_const(bpp);
-	tmp.full = dfixed_const(8);
-	fbpp.full = dfixed_div(fbpp, tmp);
+	kbps = clock * bpp;
 
-	result.full = dfixed_mul(pix_bw, fbpp);
-	margin.full = dfixed_const(54);
-	tmp.full = dfixed_const(64);
-	margin.full = dfixed_div(margin, tmp);
-	result.full = dfixed_div(result, margin);
+	/*
+	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+	 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
+	 * common multiplier to render an integer PBN for all link rate/lane
+	 * counts combinations
+	 * calculate
+	 * peak_kbps *= (1006/1000)
+	 * peak_kbps *= (64/54)
+	 * peak_kbps *= 8    convert to bytes
+	 */
 
-	margin.full = dfixed_const(1006);
-	tmp.full = dfixed_const(1000);
-	margin.full = dfixed_div(margin, tmp);
-	result.full = dfixed_mul(result, margin);
+	numerator = 64 * 1006;
+	denominator = 54 * 8 * 1000 * 1000;
 
-	result.full = dfixed_div(result, tmp);
-	result.full = dfixed_ceil(result);
-	res = dfixed_trunc(result);
-	return res;
+	kbps *= numerator;
+	peak_kbps = drm_fixp_from_fraction(kbps, denominator);
+
+	return drm_fixp2int_ceil(peak_kbps);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
@@ -2638,11 +2689,23 @@
 {
 	int ret;
 	ret = drm_dp_calc_pbn_mode(154000, 30);
-	if (ret != 689)
+	if (ret != 689) {
+		DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+				154000, 30, 689, ret);
 		return -EINVAL;
+	}
 	ret = drm_dp_calc_pbn_mode(234000, 30);
-	if (ret != 1047)
+	if (ret != 1047) {
+		DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+				234000, 30, 1047, ret);
 		return -EINVAL;
+	}
+	ret = drm_dp_calc_pbn_mode(297000, 24);
+	if (ret != 1063) {
+		DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+				297000, 24, 1063, ret);
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -2783,6 +2846,13 @@
 	mutex_unlock(&mgr->qlock);
 }
 
+static void drm_dp_free_mst_port(struct kref *kref)
+{
+	struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref);
+	kref_put(&port->parent->kref, drm_dp_free_mst_branch_device);
+	kfree(port);
+}
+
 static void drm_dp_destroy_connector_work(struct work_struct *work)
 {
 	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
@@ -2803,13 +2873,22 @@
 		list_del(&port->next);
 		mutex_unlock(&mgr->destroy_connector_lock);
 
+		kref_init(&port->kref);
+		INIT_LIST_HEAD(&port->next);
+
 		mgr->cbs->destroy_connector(mgr, port->connector);
 
 		drm_dp_port_teardown_pdt(port, port->pdt);
 
-		if (!port->input && port->vcpi.vcpi > 0)
-			drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-		kfree(port);
+		if (!port->input && port->vcpi.vcpi > 0) {
+			if (mgr->mst_state) {
+				drm_dp_mst_reset_vcpi_slots(mgr, port);
+				drm_dp_update_payload_part1(mgr);
+				drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
+			}
+		}
+
+		kref_put(&port->kref, drm_dp_free_mst_port);
 		send_hotplug = true;
 	}
 	if (send_hotplug)
@@ -2847,6 +2926,9 @@
 	mgr->max_dpcd_transaction_bytes = max_dpcd_transaction_bytes;
 	mgr->max_payloads = max_payloads;
 	mgr->conn_base_id = conn_base_id;
+	if (max_payloads + 1 > sizeof(mgr->payload_mask) * 8 ||
+	    max_payloads + 1 > sizeof(mgr->vcpi_mask) * 8)
+		return -EINVAL;
 	mgr->payloads = kcalloc(max_payloads, sizeof(struct drm_dp_payload), GFP_KERNEL);
 	if (!mgr->payloads)
 		return -ENOMEM;
@@ -2854,7 +2936,9 @@
 	if (!mgr->proposed_vcpis)
 		return -ENOMEM;
 	set_bit(0, &mgr->payload_mask);
-	test_calc_pbn_mode();
+	if (test_calc_pbn_mode() < 0)
+		DRM_ERROR("MST PBN self-test failed\n");
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e5df53b..1f500a1 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -109,8 +109,8 @@
 	if (IS_ERR(cma_obj))
 		return cma_obj;
 
-	cma_obj->vaddr = dma_alloc_writecombine(drm->dev, size,
-			&cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN);
+	cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
+				      GFP_KERNEL | __GFP_NOWARN);
 	if (!cma_obj->vaddr) {
 		dev_err(drm->dev, "failed to allocate buffer with size %zu\n",
 			size);
@@ -192,8 +192,8 @@
 	cma_obj = to_drm_gem_cma_obj(gem_obj);
 
 	if (cma_obj->vaddr) {
-		dma_free_writecombine(gem_obj->dev->dev, cma_obj->base.size,
-				      cma_obj->vaddr, cma_obj->paddr);
+		dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+			    cma_obj->vaddr, cma_obj->paddr);
 	} else if (gem_obj->import_attach) {
 		drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
 	}
@@ -324,9 +324,8 @@
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_writecombine(cma_obj->base.dev->dev, vma,
-				    cma_obj->vaddr, cma_obj->paddr,
-				    vma->vm_end - vma->vm_start);
+	ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
+			  cma_obj->paddr, vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index d12a4ef..1fe1457 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -224,6 +224,64 @@
 		diff = (flags & DRM_CALLED_FROM_VBLIRQ) != 0;
 	}
 
+	/*
+	 * Within a drm_vblank_pre_modeset - drm_vblank_post_modeset
+	 * interval? If so then vblank irqs keep running and it will likely
+	 * happen that the hardware vblank counter is not trustworthy as it
+	 * might reset at some point in that interval and vblank timestamps
+	 * are not trustworthy either in that interval. Iow. this can result
+	 * in a bogus diff >> 1 which must be avoided as it would cause
+	 * random large forward jumps of the software vblank counter.
+	 */
+	if (diff > 1 && (vblank->inmodeset & 0x2)) {
+		DRM_DEBUG_VBL("clamping vblank bump to 1 on crtc %u: diffr=%u"
+			      " due to pre-modeset.\n", pipe, diff);
+		diff = 1;
+	}
+
+	/*
+	 * FIMXE: Need to replace this hack with proper seqlocks.
+	 *
+	 * Restrict the bump of the software vblank counter to a safe maximum
+	 * value of +1 whenever there is the possibility that concurrent readers
+	 * of vblank timestamps could be active at the moment, as the current
+	 * implementation of the timestamp caching and updating is not safe
+	 * against concurrent readers for calls to store_vblank() with a bump
+	 * of anything but +1. A bump != 1 would very likely return corrupted
+	 * timestamps to userspace, because the same slot in the cache could
+	 * be concurrently written by store_vblank() and read by one of those
+	 * readers without the read-retry logic detecting the collision.
+	 *
+	 * Concurrent readers can exist when we are called from the
+	 * drm_vblank_off() or drm_vblank_on() functions and other non-vblank-
+	 * irq callers. However, all those calls to us are happening with the
+	 * vbl_lock locked to prevent drm_vblank_get(), so the vblank refcount
+	 * can't increase while we are executing. Therefore a zero refcount at
+	 * this point is safe for arbitrary counter bumps if we are called
+	 * outside vblank irq, a non-zero count is not 100% safe. Unfortunately
+	 * we must also accept a refcount of 1, as whenever we are called from
+	 * drm_vblank_get() -> drm_vblank_enable() the refcount will be 1 and
+	 * we must let that one pass through in order to not lose vblank counts
+	 * during vblank irq off - which would completely defeat the whole
+	 * point of this routine.
+	 *
+	 * Whenever we are called from vblank irq, we have to assume concurrent
+	 * readers exist or can show up any time during our execution, even if
+	 * the refcount is currently zero, as vblank irqs are usually only
+	 * enabled due to the presence of readers, and because when we are called
+	 * from vblank irq we can't hold the vbl_lock to protect us from sudden
+	 * bumps in vblank refcount. Therefore also restrict bumps to +1 when
+	 * called from vblank irq.
+	 */
+	if ((diff > 1) && (atomic_read(&vblank->refcount) > 1 ||
+	    (flags & DRM_CALLED_FROM_VBLIRQ))) {
+		DRM_DEBUG_VBL("clamping vblank bump to 1 on crtc %u: diffr=%u "
+			      "refcount %u, vblirq %u\n", pipe, diff,
+			      atomic_read(&vblank->refcount),
+			      (flags & DRM_CALLED_FROM_VBLIRQ) != 0);
+		diff = 1;
+	}
+
 	DRM_DEBUG_VBL("updating vblank count on crtc %u:"
 		      " current=%u, diff=%u, hw=%u hw_last=%u\n",
 		      pipe, vblank->count, diff, cur_vblank, vblank->last);
@@ -1316,7 +1374,13 @@
 	spin_lock_irqsave(&dev->event_lock, irqflags);
 
 	spin_lock(&dev->vbl_lock);
-	vblank_disable_and_save(dev, pipe);
+	DRM_DEBUG_VBL("crtc %d, vblank enabled %d, inmodeset %d\n",
+		      pipe, vblank->enabled, vblank->inmodeset);
+
+	/* Avoid redundant vblank disables without previous drm_vblank_on(). */
+	if (drm_core_check_feature(dev, DRIVER_ATOMIC) || !vblank->inmodeset)
+		vblank_disable_and_save(dev, pipe);
+
 	wake_up(&vblank->queue);
 
 	/*
@@ -1418,6 +1482,9 @@
 		return;
 
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	DRM_DEBUG_VBL("crtc %d, vblank enabled %d, inmodeset %d\n",
+		      pipe, vblank->enabled, vblank->inmodeset);
+
 	/* Drop our private "prevent drm_vblank_get" refcount */
 	if (vblank->inmodeset) {
 		atomic_dec(&vblank->refcount);
@@ -1430,8 +1497,7 @@
 	 * re-enable interrupts if there are users left, or the
 	 * user wishes vblank interrupts to be enabled all the time.
 	 */
-	if (atomic_read(&vblank->refcount) != 0 ||
-	    (!dev->vblank_disable_immediate && drm_vblank_offdelay == 0))
+	if (atomic_read(&vblank->refcount) != 0 || drm_vblank_offdelay == 0)
 		WARN_ON(drm_vblank_enable(dev, pipe));
 	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
@@ -1526,6 +1592,7 @@
 	if (vblank->inmodeset) {
 		spin_lock_irqsave(&dev->vbl_lock, irqflags);
 		dev->vblank_disable_allowed = true;
+		drm_reset_vblank_timestamp(dev, pipe);
 		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 
 		if (vblank->inmodeset & 0x2)
diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
index 9e585d5..e881482 100644
--- a/drivers/gpu/drm/etnaviv/common.xml.h
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -8,8 +8,8 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@
 #define ENDIAN_MODE_NO_SWAP					0x00000000
 #define ENDIAN_MODE_SWAP_16					0x00000001
 #define ENDIAN_MODE_SWAP_32					0x00000002
+#define chipModel_GC200						0x00000200
 #define chipModel_GC300						0x00000300
 #define chipModel_GC320						0x00000320
+#define chipModel_GC328						0x00000328
 #define chipModel_GC350						0x00000350
 #define chipModel_GC355						0x00000355
 #define chipModel_GC400						0x00000400
 #define chipModel_GC410						0x00000410
 #define chipModel_GC420						0x00000420
+#define chipModel_GC428						0x00000428
 #define chipModel_GC450						0x00000450
 #define chipModel_GC500						0x00000500
+#define chipModel_GC520						0x00000520
 #define chipModel_GC530						0x00000530
 #define chipModel_GC600						0x00000600
 #define chipModel_GC700						0x00000700
@@ -46,9 +50,16 @@
 #define chipModel_GC860						0x00000860
 #define chipModel_GC880						0x00000880
 #define chipModel_GC1000					0x00001000
+#define chipModel_GC1500					0x00001500
 #define chipModel_GC2000					0x00002000
 #define chipModel_GC2100					0x00002100
+#define chipModel_GC2200					0x00002200
+#define chipModel_GC2500					0x00002500
+#define chipModel_GC3000					0x00003000
 #define chipModel_GC4000					0x00004000
+#define chipModel_GC5000					0x00005000
+#define chipModel_GC5200					0x00005200
+#define chipModel_GC6400					0x00006400
 #define RGBA_BITS_R						0x00000001
 #define RGBA_BITS_G						0x00000002
 #define RGBA_BITS_B						0x00000004
@@ -160,7 +171,7 @@
 #define chipMinorFeatures2_UNK8					0x00000100
 #define chipMinorFeatures2_UNK9					0x00000200
 #define chipMinorFeatures2_UNK10				0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16			0x00000800
+#define chipMinorFeatures2_HALTI1				0x00000800
 #define chipMinorFeatures2_UNK12				0x00001000
 #define chipMinorFeatures2_UNK13				0x00002000
 #define chipMinorFeatures2_UNK14				0x00004000
@@ -189,7 +200,7 @@
 #define chipMinorFeatures3_UNK5					0x00000020
 #define chipMinorFeatures3_UNK6					0x00000040
 #define chipMinorFeatures3_UNK7					0x00000080
-#define chipMinorFeatures3_UNK8					0x00000100
+#define chipMinorFeatures3_FAST_MSAA				0x00000100
 #define chipMinorFeatures3_UNK9					0x00000200
 #define chipMinorFeatures3_BUG_FIXES10				0x00000400
 #define chipMinorFeatures3_UNK11				0x00000800
@@ -199,7 +210,7 @@
 #define chipMinorFeatures3_UNK15				0x00008000
 #define chipMinorFeatures3_UNK16				0x00010000
 #define chipMinorFeatures3_UNK17				0x00020000
-#define chipMinorFeatures3_UNK18				0x00040000
+#define chipMinorFeatures3_ACE					0x00040000
 #define chipMinorFeatures3_UNK19				0x00080000
 #define chipMinorFeatures3_UNK20				0x00100000
 #define chipMinorFeatures3_UNK21				0x00200000
@@ -207,7 +218,7 @@
 #define chipMinorFeatures3_UNK23				0x00800000
 #define chipMinorFeatures3_UNK24				0x01000000
 #define chipMinorFeatures3_UNK25				0x02000000
-#define chipMinorFeatures3_UNK26				0x04000000
+#define chipMinorFeatures3_NEW_HZ				0x04000000
 #define chipMinorFeatures3_UNK27				0x08000000
 #define chipMinorFeatures3_UNK28				0x10000000
 #define chipMinorFeatures3_UNK29				0x20000000
@@ -229,9 +240,9 @@
 #define chipMinorFeatures4_UNK13				0x00002000
 #define chipMinorFeatures4_UNK14				0x00004000
 #define chipMinorFeatures4_UNK15				0x00008000
-#define chipMinorFeatures4_UNK16				0x00010000
+#define chipMinorFeatures4_HALTI2				0x00010000
 #define chipMinorFeatures4_UNK17				0x00020000
-#define chipMinorFeatures4_UNK18				0x00040000
+#define chipMinorFeatures4_SMALL_MSAA				0x00040000
 #define chipMinorFeatures4_UNK19				0x00080000
 #define chipMinorFeatures4_UNK20				0x00100000
 #define chipMinorFeatures4_UNK21				0x00200000
@@ -245,5 +256,37 @@
 #define chipMinorFeatures4_UNK29				0x20000000
 #define chipMinorFeatures4_UNK30				0x40000000
 #define chipMinorFeatures4_UNK31				0x80000000
+#define chipMinorFeatures5_UNK0					0x00000001
+#define chipMinorFeatures5_UNK1					0x00000002
+#define chipMinorFeatures5_UNK2					0x00000004
+#define chipMinorFeatures5_UNK3					0x00000008
+#define chipMinorFeatures5_UNK4					0x00000010
+#define chipMinorFeatures5_UNK5					0x00000020
+#define chipMinorFeatures5_UNK6					0x00000040
+#define chipMinorFeatures5_UNK7					0x00000080
+#define chipMinorFeatures5_UNK8					0x00000100
+#define chipMinorFeatures5_HALTI3				0x00000200
+#define chipMinorFeatures5_UNK10				0x00000400
+#define chipMinorFeatures5_UNK11				0x00000800
+#define chipMinorFeatures5_UNK12				0x00001000
+#define chipMinorFeatures5_UNK13				0x00002000
+#define chipMinorFeatures5_UNK14				0x00004000
+#define chipMinorFeatures5_UNK15				0x00008000
+#define chipMinorFeatures5_UNK16				0x00010000
+#define chipMinorFeatures5_UNK17				0x00020000
+#define chipMinorFeatures5_UNK18				0x00040000
+#define chipMinorFeatures5_UNK19				0x00080000
+#define chipMinorFeatures5_UNK20				0x00100000
+#define chipMinorFeatures5_UNK21				0x00200000
+#define chipMinorFeatures5_UNK22				0x00400000
+#define chipMinorFeatures5_UNK23				0x00800000
+#define chipMinorFeatures5_UNK24				0x01000000
+#define chipMinorFeatures5_UNK25				0x02000000
+#define chipMinorFeatures5_UNK26				0x04000000
+#define chipMinorFeatures5_UNK27				0x08000000
+#define chipMinorFeatures5_UNK28				0x10000000
+#define chipMinorFeatures5_UNK29				0x20000000
+#define chipMinorFeatures5_UNK30				0x40000000
+#define chipMinorFeatures5_UNK31				0x80000000
 
 #endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 5c89ebb..e885898 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -668,7 +668,6 @@
 	.probe      = etnaviv_pdev_probe,
 	.remove     = etnaviv_pdev_remove,
 	.driver     = {
-		.owner  = THIS_MODULE,
 		.name   = "etnaviv",
 		.of_match_table = dt_match,
 	},
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d6bd438..1cd6046 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -85,7 +85,7 @@
 	struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
 		struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index bf8fa85..4a29eea 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -201,7 +201,9 @@
 
 		obj = vram->object;
 
+		mutex_lock(&obj->lock);
 		pages = etnaviv_gem_get_pages(obj);
+		mutex_unlock(&obj->lock);
 		if (pages) {
 			int j;
 
@@ -213,8 +215,8 @@
 
 		iter.hdr->iova = cpu_to_le64(vram->iova);
 
-		vaddr = etnaviv_gem_vaddr(&obj->base);
-		if (vaddr && !IS_ERR(vaddr))
+		vaddr = etnaviv_gem_vmap(&obj->base);
+		if (vaddr)
 			memcpy(iter.data, vaddr, obj->base.size);
 
 		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 9f77c3b..4b519e4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -353,25 +353,39 @@
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
+	if (etnaviv_obj->vaddr)
+		return etnaviv_obj->vaddr;
+
 	mutex_lock(&etnaviv_obj->lock);
-	if (!etnaviv_obj->vaddr) {
-		struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-		if (IS_ERR(pages))
-			return ERR_CAST(pages);
-
-		etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-	}
+	/*
+	 * Need to check again, as we might have raced with another thread
+	 * while waiting for the mutex.
+	 */
+	if (!etnaviv_obj->vaddr)
+		etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
 	mutex_unlock(&etnaviv_obj->lock);
 
 	return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+	struct page **pages;
+
+	lockdep_assert_held(&obj->lock);
+
+	pages = etnaviv_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return NULL;
+
+	return vmap(pages, obj->base.size >> PAGE_SHIFT,
+			VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
 	if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
 	.get_pages = etnaviv_gem_shmem_get_pages,
 	.release = etnaviv_gem_shmem_release,
+	.vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
 	.get_pages = etnaviv_gem_userptr_get_pages,
 	.release = etnaviv_gem_userptr_release,
+	.vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index a300b4b..ab5df81 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -78,6 +78,7 @@
 struct etnaviv_gem_ops {
 	int (*get_pages)(struct etnaviv_gem_object *);
 	void (*release)(struct etnaviv_gem_object *);
+	void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index e94db4f..4e67395 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -31,7 +31,7 @@
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-	return etnaviv_gem_vaddr(obj);
+	return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@
 	drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
 	/* .get_pages should never be called */
 	.release = etnaviv_gem_prime_release,
+	.vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 056a72e..3c1ce44 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -72,6 +72,14 @@
 		*value = gpu->identity.minor_features3;
 		break;
 
+	case ETNAVIV_PARAM_GPU_FEATURES_5:
+		*value = gpu->identity.minor_features4;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_6:
+		*value = gpu->identity.minor_features5;
+		break;
+
 	case ETNAVIV_PARAM_GPU_STREAM_COUNT:
 		*value = gpu->identity.stream_count;
 		break;
@@ -112,6 +120,10 @@
 		*value = gpu->identity.num_constants;
 		break;
 
+	case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+		*value = gpu->identity.varyings_count;
+		break;
+
 	default:
 		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
 		return -EINVAL;
@@ -120,46 +132,56 @@
 	return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+	((gpu)->identity.model == chipModel_##mod && \
+	 (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+	(((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
 	if (gpu->identity.minor_features0 &
 	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
-		u32 specs[2];
+		u32 specs[4];
+		unsigned int streams;
 
 		specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
 		specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
+		specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+		specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
 
-		gpu->identity.stream_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-		gpu->identity.register_max =
-			(specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-				>> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-		gpu->identity.thread_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-		gpu->identity.vertex_cache_size =
-			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-		gpu->identity.shader_core_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-		gpu->identity.pixel_pipes =
-			(specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-				>> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+		gpu->identity.stream_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+		gpu->identity.register_max = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+		gpu->identity.thread_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+		gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+		gpu->identity.shader_core_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+		gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
 		gpu->identity.vertex_output_buffer_size =
-			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
+			etnaviv_field(specs[0],
+				VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
 
-		gpu->identity.buffer_size =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-		gpu->identity.instruction_count =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-		gpu->identity.num_constants =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+		gpu->identity.buffer_size = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+		gpu->identity.instruction_count = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+		gpu->identity.num_constants = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+		gpu->identity.varyings_count = etnaviv_field(specs[2],
+					VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+		/* This overrides the value from older register if non-zero */
+		streams = etnaviv_field(specs[3],
+					VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+		if (streams)
+			gpu->identity.stream_count = streams;
 	}
 
 	/* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@
 	/* Convert the register max value */
 	if (gpu->identity.register_max)
 		gpu->identity.register_max = 1 << gpu->identity.register_max;
-	else if (gpu->identity.model == 0x0400)
+	else if (gpu->identity.model == chipModel_GC400)
 		gpu->identity.register_max = 32;
 	else
 		gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@
 	/* Convert thread count */
 	if (gpu->identity.thread_count)
 		gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-	else if (gpu->identity.model == 0x0400)
+	else if (gpu->identity.model == chipModel_GC400)
 		gpu->identity.thread_count = 64;
-	else if (gpu->identity.model == 0x0500 ||
-		 gpu->identity.model == 0x0530)
+	else if (gpu->identity.model == chipModel_GC500 ||
+		 gpu->identity.model == chipModel_GC530)
 		gpu->identity.thread_count = 128;
 	else
 		gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@
 	if (gpu->identity.vertex_output_buffer_size) {
 		gpu->identity.vertex_output_buffer_size =
 			1 << gpu->identity.vertex_output_buffer_size;
-	} else if (gpu->identity.model == 0x0400) {
+	} else if (gpu->identity.model == chipModel_GC400) {
 		if (gpu->identity.revision < 0x4000)
 			gpu->identity.vertex_output_buffer_size = 512;
 		else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@
 
 	switch (gpu->identity.instruction_count) {
 	case 0:
-		if ((gpu->identity.model == 0x2000 &&
-		     gpu->identity.revision == 0x5108) ||
-		    gpu->identity.model == 0x880)
+		if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+		    gpu->identity.model == chipModel_GC880)
 			gpu->identity.instruction_count = 512;
 		else
 			gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@
 
 	if (gpu->identity.num_constants == 0)
 		gpu->identity.num_constants = 168;
+
+	if (gpu->identity.varyings_count == 0) {
+		if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+			gpu->identity.varyings_count = 12;
+		else
+			gpu->identity.varyings_count = 8;
+	}
+
+	/*
+	 * For some cores, two varyings are consumed for position, so the
+	 * maximum varying count needs to be reduced by one.
+	 */
+	if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+	    etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+	    etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+	    etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+	    etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+	    etnaviv_is_model_rev(gpu, GC880, 0x5106))
+		gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@
 	chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
 	/* Special case for older graphic cores. */
-	if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-	     >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-		gpu->identity.model    = 0x500; /* gc500 */
-		gpu->identity.revision =
-			(chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-			>> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+	if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+		gpu->identity.model    = chipModel_GC500;
+		gpu->identity.revision = etnaviv_field(chipIdentity,
+					 VIVS_HI_CHIP_IDENTITY_REVISION);
 	} else {
 
 		gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@
 		 * same.  Only for GC400 family.
 		 */
 		if ((gpu->identity.model & 0xff00) == 0x0400 &&
-		    gpu->identity.model != 0x0420) {
+		    gpu->identity.model != chipModel_GC420) {
 			gpu->identity.model = gpu->identity.model & 0x0400;
 		}
 
 		/* Another special case */
-		if (gpu->identity.model == 0x300 &&
-		    gpu->identity.revision == 0x2201) {
+		if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
 			u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
 			u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -295,11 +337,13 @@
 	gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
 	/* Disable fast clear on GC700. */
-	if (gpu->identity.model == 0x700)
+	if (gpu->identity.model == chipModel_GC700)
 		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-	if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-	    (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+	if ((gpu->identity.model == chipModel_GC500 &&
+	     gpu->identity.revision < 2) ||
+	    (gpu->identity.model == chipModel_GC300 &&
+	     gpu->identity.revision < 0x2000)) {
 
 		/*
 		 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@
 		gpu->identity.minor_features1 = 0;
 		gpu->identity.minor_features2 = 0;
 		gpu->identity.minor_features3 = 0;
+		gpu->identity.minor_features4 = 0;
+		gpu->identity.minor_features5 = 0;
 	} else
 		gpu->identity.minor_features0 =
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
 		gpu->identity.minor_features3 =
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+		gpu->identity.minor_features4 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+		gpu->identity.minor_features5 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
 	}
 
 	/* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@
 {
 	u16 prefetch;
 
-	if (gpu->identity.model == chipModel_GC320 &&
-	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-	    (gpu->identity.revision == 0x5007 ||
-	     gpu->identity.revision == 0x5220)) {
+	if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+	     etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
 		u32 mc_memory_debug;
 
 		mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@
 		  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
 	/* GC2000 rev 5108 needs a special bus config */
-	if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+	if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
 		u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
 		bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
 				VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@
 
 	if (gpu->identity.model == 0) {
 		dev_err(gpu->dev, "Unknown GPU model\n");
-		pm_runtime_put_autosuspend(gpu->dev);
-		return -ENXIO;
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	/* Exclude VG cores with FE2.0 */
+	if (gpu->identity.features & chipFeatures_PIPE_VG &&
+	    gpu->identity.features & chipFeatures_FE20) {
+		dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+		ret = -ENXIO;
+		goto fail;
 	}
 
 	ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@
 		goto fail;
 	}
 
-	/* TODO: we will leak here memory - fix it! */
-
 	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
 	if (!gpu->mmu) {
+		iommu_domain_free(iommu);
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -552,7 +608,7 @@
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
-		goto fail;
+		goto destroy_iommu;
 	}
 	if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
 		ret = -EINVAL;
@@ -582,6 +638,9 @@
 free_buffer:
 	etnaviv_gpu_cmdbuf_free(gpu->buffer);
 	gpu->buffer = NULL;
+destroy_iommu:
+	etnaviv_iommu_destroy(gpu->mmu);
+	gpu->mmu = NULL;
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@
 		   gpu->identity.minor_features2);
 	seq_printf(m, "\t minor_features3: 0x%08x\n",
 		   gpu->identity.minor_features3);
+	seq_printf(m, "\t minor_features4: 0x%08x\n",
+		   gpu->identity.minor_features4);
+	seq_printf(m, "\t minor_features5: 0x%08x\n",
+		   gpu->identity.minor_features5);
 
 	seq_puts(m, "\tspecs\n");
 	seq_printf(m, "\t stream_count:  %d\n",
@@ -664,6 +727,8 @@
 			gpu->identity.instruction_count);
 	seq_printf(m, "\t num_constants: %d\n",
 			gpu->identity.num_constants);
+	seq_printf(m, "\t varyings_count: %d\n",
+			gpu->identity.varyings_count);
 
 	seq_printf(m, "\taxi: 0x%08x\n", axi);
 	seq_printf(m, "\tidle: 0x%08x\n", idle);
@@ -1048,8 +1113,8 @@
 	if (!cmdbuf)
 		return NULL;
 
-	cmdbuf->vaddr = dma_alloc_writecombine(gpu->dev, size, &cmdbuf->paddr,
-					       GFP_KERNEL);
+	cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
+				     GFP_KERNEL);
 	if (!cmdbuf->vaddr) {
 		kfree(cmdbuf);
 		return NULL;
@@ -1063,8 +1128,8 @@
 
 void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 {
-	dma_free_writecombine(cmdbuf->gpu->dev, cmdbuf->size,
-			      cmdbuf->vaddr, cmdbuf->paddr);
+	dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
+		    cmdbuf->paddr);
 	kfree(cmdbuf);
 }
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index c75d503..f233ac4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -46,6 +46,12 @@
 	/* Supported minor feature 3 fields. */
 	u32 minor_features3;
 
+	/* Supported minor feature 4 fields. */
+	u32 minor_features4;
+
+	/* Supported minor feature 5 fields. */
+	u32 minor_features5;
+
 	/* Number of streams supported. */
 	u32 stream_count;
 
@@ -75,6 +81,9 @@
 
 	/* Buffer size */
 	u32 buffer_size;
+
+	/* Number of varyings */
+	u8 varyings_count;
 };
 
 struct etnaviv_event {
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 0064f26..6a7de5f 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -8,8 +8,8 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3				0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3					0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK		0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT		4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK		0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT		0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4				0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4					0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK			0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT		12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5				0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID					0x000000a8
+
 #define VIVS_PM							0x00000000
 
 #define VIVS_PM_POWER_CONTROLS					0x00000100
@@ -206,6 +223,11 @@
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE		0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE		0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH		0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA		0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE		0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA		0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX		0x00000080
 
 #define VIVS_PM_PULSE_EATER					0x0000010c
 
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 83efca9..f17d392 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -1,6 +1,6 @@
 config DRM_EXYNOS
 	tristate "DRM Support for Samsung SoC EXYNOS Series"
-	depends on OF && DRM && (PLAT_SAMSUNG || ARCH_MULTIPLATFORM)
+	depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
 	select DRM_KMS_HELPER
 	select DRM_KMS_FB_HELPER
 	select FB_CFB_FILLRECT
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 1bf6a21..162ab93 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -93,7 +93,7 @@
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
 		return -EPERM;
 
-	if (test_and_set_bit(BIT_IRQS_ENABLED, &ctx->flags)) {
+	if (!test_and_set_bit(BIT_IRQS_ENABLED, &ctx->flags)) {
 		val = VIDINTCON0_INTEN;
 		if (ctx->out_type == IFTYPE_I80)
 			val |= VIDINTCON0_FRAMEDONE;
@@ -402,8 +402,6 @@
 		decon_enable_vblank(ctx->crtc);
 
 	decon_commit(ctx->crtc);
-
-	set_bit(BIT_SUSPENDED, &ctx->flags);
 }
 
 static void decon_disable(struct exynos_drm_crtc *crtc)
@@ -582,9 +580,9 @@
 static int exynos5433_decon_suspend(struct device *dev)
 {
 	struct decon_context *ctx = dev_get_drvdata(dev);
-	int i;
+	int i = ARRAY_SIZE(decon_clks_name);
 
-	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++)
+	while (--i >= 0)
 		clk_disable_unprepare(ctx->clks[i]);
 
 	return 0;
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index b79c316..673164b 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c
@@ -1392,7 +1392,7 @@
 static int exynos_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *panel_node = NULL, *bridge_node, *endpoint = NULL;
+	struct device_node *np = NULL, *endpoint = NULL;
 	struct exynos_dp_device *dp;
 	int ret;
 
@@ -1404,41 +1404,36 @@
 	platform_set_drvdata(pdev, dp);
 
 	/* This is for the backward compatibility. */
-	panel_node = of_parse_phandle(dev->of_node, "panel", 0);
-	if (panel_node) {
-		dp->panel = of_drm_find_panel(panel_node);
-		of_node_put(panel_node);
+	np = of_parse_phandle(dev->of_node, "panel", 0);
+	if (np) {
+		dp->panel = of_drm_find_panel(np);
+		of_node_put(np);
 		if (!dp->panel)
 			return -EPROBE_DEFER;
-	} else {
-		endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-		if (endpoint) {
-			panel_node = of_graph_get_remote_port_parent(endpoint);
-			if (panel_node) {
-				dp->panel = of_drm_find_panel(panel_node);
-				of_node_put(panel_node);
-				if (!dp->panel)
-					return -EPROBE_DEFER;
-			} else {
-				DRM_ERROR("no port node for panel device.\n");
-				return -EINVAL;
-			}
-		}
-	}
-
-	if (endpoint)
 		goto out;
+	}
 
 	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
 	if (endpoint) {
-		bridge_node = of_graph_get_remote_port_parent(endpoint);
-		if (bridge_node) {
-			dp->ptn_bridge = of_drm_find_bridge(bridge_node);
-			of_node_put(bridge_node);
-			if (!dp->ptn_bridge)
-				return -EPROBE_DEFER;
-		} else
-			return -EPROBE_DEFER;
+		np = of_graph_get_remote_port_parent(endpoint);
+		if (np) {
+			/* The remote port can be either a panel or a bridge */
+			dp->panel = of_drm_find_panel(np);
+			if (!dp->panel) {
+				dp->ptn_bridge = of_drm_find_bridge(np);
+				if (!dp->ptn_bridge) {
+					of_node_put(np);
+					return -EPROBE_DEFER;
+				}
+			}
+			of_node_put(np);
+		} else {
+			DRM_ERROR("no remote endpoint device node found.\n");
+			return -EINVAL;
+		}
+	} else {
+		DRM_ERROR("no port endpoint subnode found.\n");
+		return -EINVAL;
 	}
 
 out:
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index d84a498..26e81d19 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1782,6 +1782,7 @@
 
 	bridge = of_drm_find_bridge(dsi->bridge_node);
 	if (bridge) {
+		encoder->bridge = bridge;
 		drm_bridge_attach(drm_dev, bridge);
 	}
 
@@ -1906,8 +1907,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int exynos_dsi_suspend(struct device *dev)
+static int __maybe_unused exynos_dsi_suspend(struct device *dev)
 {
 	struct drm_encoder *encoder = dev_get_drvdata(dev);
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1938,7 +1938,7 @@
 	return 0;
 }
 
-static int exynos_dsi_resume(struct device *dev)
+static int __maybe_unused exynos_dsi_resume(struct device *dev)
 {
 	struct drm_encoder *encoder = dev_get_drvdata(dev);
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1972,7 +1972,6 @@
 
 	return ret;
 }
-#endif
 
 static const struct dev_pm_ops exynos_dsi_pm_ops = {
 	SET_RUNTIME_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume, NULL)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index f6118ba..8baabd8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -50,7 +50,7 @@
 	if (vm_size > exynos_gem->size)
 		return -EINVAL;
 
-	ret = dma_mmap_attrs(helper->dev->dev, vma, exynos_gem->pages,
+	ret = dma_mmap_attrs(helper->dev->dev, vma, exynos_gem->cookie,
 			     exynos_gem->dma_addr, exynos_gem->size,
 			     &exynos_gem->dma_attrs);
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index c747824..8a4f4a0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1723,7 +1723,7 @@
 		goto err_put_clk;
 	}
 
-	DRM_DEBUG_KMS("id[%d]ippdrv[0x%x]\n", ctx->id, (int)ippdrv);
+	DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
 
 	spin_lock_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index c17efdb..8dfe6e1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1166,7 +1166,7 @@
 		goto err_free_event;
 	}
 
-	cmd = (struct drm_exynos_g2d_cmd *)(uint32_t)req->cmd;
+	cmd = (struct drm_exynos_g2d_cmd *)(unsigned long)req->cmd;
 
 	if (copy_from_user(cmdlist->data + cmdlist->last,
 				(void __user *)cmd,
@@ -1184,7 +1184,8 @@
 	if (req->cmd_buf_nr) {
 		struct drm_exynos_g2d_cmd *cmd_buf;
 
-		cmd_buf = (struct drm_exynos_g2d_cmd *)(uint32_t)req->cmd_buf;
+		cmd_buf = (struct drm_exynos_g2d_cmd *)
+				(unsigned long)req->cmd_buf;
 
 		if (copy_from_user(cmdlist->data + cmdlist->last,
 					(void __user *)cmd_buf,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 32358c5..26b5e4b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -218,7 +218,7 @@
 		return ERR_PTR(ret);
 	}
 
-	DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp);
+	DRM_DEBUG_KMS("created file object = %p\n", obj->filp);
 
 	return exynos_gem;
 }
@@ -335,7 +335,7 @@
 	if (vm_size > exynos_gem->size)
 		return -EINVAL;
 
-	ret = dma_mmap_attrs(drm_dev->dev, vma, exynos_gem->pages,
+	ret = dma_mmap_attrs(drm_dev->dev, vma, exynos_gem->cookie,
 			     exynos_gem->dma_addr, exynos_gem->size,
 			     &exynos_gem->dma_attrs);
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 7aecd23..5d20da8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1723,7 +1723,7 @@
 		return ret;
 	}
 
-	DRM_DEBUG_KMS("id[%d]ippdrv[0x%x]\n", ctx->id, (int)ippdrv);
+	DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
 
 	mutex_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 67d2423..95eeb91 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -208,7 +208,7 @@
 	 * e.g PAUSE state, queue buf, command control.
 	 */
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]\n", count++, (int)ippdrv);
+		DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", count++, ippdrv);
 
 		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
@@ -388,8 +388,8 @@
 	}
 	property->prop_id = ret;
 
-	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[0x%x]\n",
-		property->prop_id, property->cmd, (int)ippdrv);
+	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%p]\n",
+		property->prop_id, property->cmd, ippdrv);
 
 	/* stored property information and ippdrv in private data */
 	c_node->property = *property;
@@ -518,7 +518,7 @@
 {
 	int i;
 
-	DRM_DEBUG_KMS("node[0x%x]\n", (int)m_node);
+	DRM_DEBUG_KMS("node[%p]\n", m_node);
 
 	if (!m_node) {
 		DRM_ERROR("invalid dequeue node.\n");
@@ -562,7 +562,7 @@
 	m_node->buf_id = qbuf->buf_id;
 	INIT_LIST_HEAD(&m_node->list);
 
-	DRM_DEBUG_KMS("m_node[0x%x]ops_id[%d]\n", (int)m_node, qbuf->ops_id);
+	DRM_DEBUG_KMS("m_node[%p]ops_id[%d]\n", m_node, qbuf->ops_id);
 	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
 
 	for_each_ipp_planar(i) {
@@ -582,8 +582,8 @@
 
 			buf_info->handles[i] = qbuf->handle[i];
 			buf_info->base[i] = *addr;
-			DRM_DEBUG_KMS("i[%d]base[0x%x]hd[0x%lx]\n", i,
-				      buf_info->base[i], buf_info->handles[i]);
+			DRM_DEBUG_KMS("i[%d]base[%pad]hd[0x%lx]\n", i,
+				      &buf_info->base[i], buf_info->handles[i]);
 		}
 	}
 
@@ -664,7 +664,7 @@
 
 	mutex_lock(&c_node->event_lock);
 	list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
-		DRM_DEBUG_KMS("count[%d]e[0x%x]\n", count++, (int)e);
+		DRM_DEBUG_KMS("count[%d]e[%p]\n", count++, e);
 
 		/*
 		 * qbuf == NULL condition means all event deletion.
@@ -755,7 +755,7 @@
 
 	/* find memory node from memory list */
 	list_for_each_entry(m_node, head, list) {
-		DRM_DEBUG_KMS("count[%d]m_node[0x%x]\n", count++, (int)m_node);
+		DRM_DEBUG_KMS("count[%d]m_node[%p]\n", count++, m_node);
 
 		/* compare buffer id */
 		if (m_node->buf_id == qbuf->buf_id)
@@ -772,7 +772,7 @@
 	struct exynos_drm_ipp_ops *ops = NULL;
 	int ret = 0;
 
-	DRM_DEBUG_KMS("node[0x%x]\n", (int)m_node);
+	DRM_DEBUG_KMS("node[%p]\n", m_node);
 
 	if (!m_node) {
 		DRM_ERROR("invalid queue node.\n");
@@ -1237,7 +1237,7 @@
 			m_node = list_first_entry(head,
 				struct drm_exynos_ipp_mem_node, list);
 
-			DRM_DEBUG_KMS("m_node[0x%x]\n", (int)m_node);
+			DRM_DEBUG_KMS("m_node[%p]\n", m_node);
 
 			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 			if (ret) {
@@ -1610,8 +1610,8 @@
 		}
 		ippdrv->prop_list.ipp_id = ret;
 
-		DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]ipp_id[%d]\n",
-			count++, (int)ippdrv, ret);
+		DRM_DEBUG_KMS("count[%d]ippdrv[%p]ipp_id[%d]\n",
+			count++, ippdrv, ret);
 
 		/* store parent device for node */
 		ippdrv->parent_dev = dev;
@@ -1668,7 +1668,7 @@
 
 	file_priv->ipp_dev = dev;
 
-	DRM_DEBUG_KMS("done priv[0x%x]\n", (int)dev);
+	DRM_DEBUG_KMS("done priv[%p]\n", dev);
 
 	return 0;
 }
@@ -1685,8 +1685,8 @@
 		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry_safe(c_node, tc_node,
 			&ippdrv->cmd_list, list) {
-			DRM_DEBUG_KMS("count[%d]ippdrv[0x%x]\n",
-				count++, (int)ippdrv);
+			DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n",
+				count++, ippdrv);
 
 			if (c_node->filp == file) {
 				/*
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 4eaef36..9869d70 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_graph.h>
 #include <linux/clk.h>
+#include <linux/component.h>
 #include <drm/drmP.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
@@ -306,9 +307,9 @@
 	return ret;
 }
 
-void mic_disable(struct drm_bridge *bridge) { }
+static void mic_disable(struct drm_bridge *bridge) { }
 
-void mic_post_disable(struct drm_bridge *bridge)
+static void mic_post_disable(struct drm_bridge *bridge)
 {
 	struct exynos_mic *mic = bridge->driver_private;
 	int i;
@@ -328,7 +329,7 @@
 	mutex_unlock(&mic_mutex);
 }
 
-void mic_pre_enable(struct drm_bridge *bridge)
+static void mic_pre_enable(struct drm_bridge *bridge)
 {
 	struct exynos_mic *mic = bridge->driver_private;
 	int ret, i;
@@ -371,11 +372,35 @@
 	mutex_unlock(&mic_mutex);
 }
 
-void mic_enable(struct drm_bridge *bridge) { }
+static void mic_enable(struct drm_bridge *bridge) { }
 
-void mic_destroy(struct drm_bridge *bridge)
+static const struct drm_bridge_funcs mic_bridge_funcs = {
+	.disable = mic_disable,
+	.post_disable = mic_post_disable,
+	.pre_enable = mic_pre_enable,
+	.enable = mic_enable,
+};
+
+static int exynos_mic_bind(struct device *dev, struct device *master,
+			   void *data)
 {
-	struct exynos_mic *mic = bridge->driver_private;
+	struct exynos_mic *mic = dev_get_drvdata(dev);
+	int ret;
+
+	mic->bridge.funcs = &mic_bridge_funcs;
+	mic->bridge.of_node = dev->of_node;
+	mic->bridge.driver_private = mic;
+	ret = drm_bridge_add(&mic->bridge);
+	if (ret)
+		DRM_ERROR("mic: Failed to add MIC to the global bridge list\n");
+
+	return ret;
+}
+
+static void exynos_mic_unbind(struct device *dev, struct device *master,
+			      void *data)
+{
+	struct exynos_mic *mic = dev_get_drvdata(dev);
 	int i;
 
 	mutex_lock(&mic_mutex);
@@ -387,16 +412,16 @@
 
 already_disabled:
 	mutex_unlock(&mic_mutex);
+
+	drm_bridge_remove(&mic->bridge);
 }
 
-static const struct drm_bridge_funcs mic_bridge_funcs = {
-	.disable = mic_disable,
-	.post_disable = mic_post_disable,
-	.pre_enable = mic_pre_enable,
-	.enable = mic_enable,
+static const struct component_ops exynos_mic_component_ops = {
+	.bind	= exynos_mic_bind,
+	.unbind	= exynos_mic_unbind,
 };
 
-int exynos_mic_probe(struct platform_device *pdev)
+static int exynos_mic_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct exynos_mic *mic;
@@ -435,17 +460,8 @@
 		goto err;
 	}
 
-	mic->bridge.funcs = &mic_bridge_funcs;
-	mic->bridge.of_node = dev->of_node;
-	mic->bridge.driver_private = mic;
-	ret = drm_bridge_add(&mic->bridge);
-	if (ret) {
-		DRM_ERROR("mic: Failed to add MIC to the global bridge list\n");
-		goto err;
-	}
-
 	for (i = 0; i < NUM_CLKS; i++) {
-		mic->clks[i] = of_clk_get_by_name(dev->of_node, clk_names[i]);
+		mic->clks[i] = devm_clk_get(dev, clk_names[i]);
 		if (IS_ERR(mic->clks[i])) {
 			DRM_ERROR("mic: Failed to get clock (%s)\n",
 								clk_names[i]);
@@ -454,7 +470,10 @@
 		}
 	}
 
+	platform_set_drvdata(pdev, mic);
+
 	DRM_DEBUG_KMS("MIC has been probed\n");
+	return component_add(dev, &exynos_mic_component_ops);
 
 err:
 	return ret;
@@ -462,14 +481,7 @@
 
 static int exynos_mic_remove(struct platform_device *pdev)
 {
-	struct exynos_mic *mic = platform_get_drvdata(pdev);
-	int i;
-
-	drm_bridge_remove(&mic->bridge);
-
-	for (i = NUM_CLKS - 1; i > -1; i--)
-		clk_put(mic->clks[i]);
-
+	component_del(&pdev->dev, &exynos_mic_component_ops);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index bea0f78..ce59f44 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -754,7 +754,7 @@
 		goto err_ippdrv_register;
 	}
 
-	DRM_DEBUG_KMS("ippdrv[0x%x]\n", (int)ippdrv);
+	DRM_DEBUG_KMS("ippdrv[%p]\n", ippdrv);
 
 	platform_set_drvdata(pdev, rot);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 62ac4e5..b605bd7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -223,7 +223,7 @@
 	}
 }
 
-static int vidi_show_connection(struct device *dev,
+static ssize_t vidi_show_connection(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	struct vidi_context *ctx = dev_get_drvdata(dev);
@@ -238,7 +238,7 @@
 	return rc;
 }
 
-static int vidi_store_connection(struct device *dev,
+static ssize_t vidi_store_connection(struct device *dev,
 				struct device_attribute *attr,
 				const char *buf, size_t len)
 {
@@ -294,7 +294,9 @@
 	}
 
 	if (vidi->connection) {
-		struct edid *raw_edid  = (struct edid *)(uint32_t)vidi->edid;
+		struct edid *raw_edid;
+
+		raw_edid = (struct edid *)(unsigned long)vidi->edid;
 		if (!drm_edid_is_valid(raw_edid)) {
 			DRM_DEBUG_KMS("edid data is invalid.\n");
 			return -EINVAL;
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index b5fbc1c..0a5a600 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1289,8 +1289,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int exynos_mixer_suspend(struct device *dev)
+static int __maybe_unused exynos_mixer_suspend(struct device *dev)
 {
 	struct mixer_context *ctx = dev_get_drvdata(dev);
 	struct mixer_resources *res = &ctx->mixer_res;
@@ -1306,7 +1305,7 @@
 	return 0;
 }
 
-static int exynos_mixer_resume(struct device *dev)
+static int __maybe_unused exynos_mixer_resume(struct device *dev)
 {
 	struct mixer_context *ctx = dev_get_drvdata(dev);
 	struct mixer_resources *res = &ctx->mixer_res;
@@ -1342,7 +1341,6 @@
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops exynos_mixer_pm_ops = {
 	SET_RUNTIME_PM_OPS(exynos_mixer_suspend, exynos_mixer_resume, NULL)
diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c
index 533d1e3..a02112b 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/i2c/adv7511.c
@@ -136,6 +136,7 @@
 	case ADV7511_REG_BKSV(3):
 	case ADV7511_REG_BKSV(4):
 	case ADV7511_REG_DDC_STATUS:
+	case ADV7511_REG_EDID_READ_CTRL:
 	case ADV7511_REG_BSTATUS(0):
 	case ADV7511_REG_BSTATUS(1):
 	case ADV7511_REG_CHIP_ID_HIGH:
@@ -362,24 +363,31 @@
 {
 	adv7511->current_edid_segment = -1;
 
-	regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-		     ADV7511_INT0_EDID_READY);
-	regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-		     ADV7511_INT1_DDC_ERROR);
 	regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
 			   ADV7511_POWER_POWER_DOWN, 0);
+	if (adv7511->i2c_main->irq) {
+		/*
+		 * Documentation says the INT_ENABLE registers are reset in
+		 * POWER_DOWN mode. My 7511w preserved the bits, however.
+		 * Still, let's be safe and stick to the documentation.
+		 */
+		regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+			     ADV7511_INT0_EDID_READY);
+		regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+			     ADV7511_INT1_DDC_ERROR);
+	}
 
 	/*
-	 * Per spec it is allowed to pulse the HDP signal to indicate that the
+	 * Per spec it is allowed to pulse the HPD signal to indicate that the
 	 * EDID information has changed. Some monitors do this when they wakeup
-	 * from standby or are enabled. When the HDP goes low the adv7511 is
+	 * from standby or are enabled. When the HPD goes low the adv7511 is
 	 * reset and the outputs are disabled which might cause the monitor to
-	 * go to standby again. To avoid this we ignore the HDP pin for the
+	 * go to standby again. To avoid this we ignore the HPD pin for the
 	 * first few seconds after enabling the output.
 	 */
 	regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-			   ADV7511_REG_POWER2_HDP_SRC_MASK,
-			   ADV7511_REG_POWER2_HDP_SRC_NONE);
+			   ADV7511_REG_POWER2_HPD_SRC_MASK,
+			   ADV7511_REG_POWER2_HPD_SRC_NONE);
 
 	/*
 	 * Most of the registers are reset during power down or when HPD is low.
@@ -413,9 +421,9 @@
 	if (ret < 0)
 		return false;
 
-	if (irq0 & ADV7511_INT0_HDP) {
+	if (irq0 & ADV7511_INT0_HPD) {
 		regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-			     ADV7511_INT0_HDP);
+			     ADV7511_INT0_HPD);
 		return true;
 	}
 
@@ -438,7 +446,7 @@
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-	if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
+	if (irq0 & ADV7511_INT0_HPD && adv7511->encoder)
 		drm_helper_hpd_irq_event(adv7511->encoder->dev);
 
 	if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
@@ -567,12 +575,14 @@
 
 	/* Reading the EDID only works if the device is powered */
 	if (!adv7511->powered) {
-		regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-			     ADV7511_INT0_EDID_READY);
-		regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-			     ADV7511_INT1_DDC_ERROR);
 		regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
 				   ADV7511_POWER_POWER_DOWN, 0);
+		if (adv7511->i2c_main->irq) {
+			regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+				     ADV7511_INT0_EDID_READY);
+			regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+				     ADV7511_INT1_DDC_ERROR);
+		}
 		adv7511->current_edid_segment = -1;
 	}
 
@@ -638,10 +648,10 @@
 		if (adv7511->status == connector_status_connected)
 			status = connector_status_disconnected;
 	} else {
-		/* Renable HDP sensing */
+		/* Renable HPD sensing */
 		regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-				   ADV7511_REG_POWER2_HDP_SRC_MASK,
-				   ADV7511_REG_POWER2_HDP_SRC_BOTH);
+				   ADV7511_REG_POWER2_HPD_SRC_MASK,
+				   ADV7511_REG_POWER2_HPD_SRC_BOTH);
 	}
 
 	adv7511->status = status;
diff --git a/drivers/gpu/drm/i2c/adv7511.h b/drivers/gpu/drm/i2c/adv7511.h
index 6599ed5..38515b3 100644
--- a/drivers/gpu/drm/i2c/adv7511.h
+++ b/drivers/gpu/drm/i2c/adv7511.h
@@ -90,7 +90,7 @@
 #define ADV7511_CSC_ENABLE			BIT(7)
 #define ADV7511_CSC_UPDATE_MODE			BIT(5)
 
-#define ADV7511_INT0_HDP			BIT(7)
+#define ADV7511_INT0_HPD			BIT(7)
 #define ADV7511_INT0_VSYNC			BIT(5)
 #define ADV7511_INT0_AUDIO_FIFO_FULL		BIT(4)
 #define ADV7511_INT0_EDID_READY			BIT(2)
@@ -157,11 +157,11 @@
 #define ADV7511_PACKET_ENABLE_SPARE2		BIT(1)
 #define ADV7511_PACKET_ENABLE_SPARE1		BIT(0)
 
-#define ADV7511_REG_POWER2_HDP_SRC_MASK		0xc0
-#define ADV7511_REG_POWER2_HDP_SRC_BOTH		0x00
-#define ADV7511_REG_POWER2_HDP_SRC_HDP		0x40
-#define ADV7511_REG_POWER2_HDP_SRC_CEC		0x80
-#define ADV7511_REG_POWER2_HDP_SRC_NONE		0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_MASK		0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_BOTH		0x00
+#define ADV7511_REG_POWER2_HPD_SRC_HPD		0x40
+#define ADV7511_REG_POWER2_HPD_SRC_CEC		0x80
+#define ADV7511_REG_POWER2_HPD_SRC_NONE		0xc0
 #define ADV7511_REG_POWER2_TDMS_ENABLE		BIT(4)
 #define ADV7511_REG_POWER2_GATE_INPUT_CLK	BIT(0)
 
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 34e3874..f8ee740 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -1382,8 +1382,16 @@
 	drm_connector_cleanup(connector);
 }
 
+static int tda998x_connector_dpms(struct drm_connector *connector, int mode)
+{
+	if (drm_core_check_feature(connector->dev, DRIVER_ATOMIC))
+		return drm_atomic_helper_connector_dpms(connector, mode);
+	else
+		return drm_helper_connector_dpms(connector, mode);
+}
+
 static const struct drm_connector_funcs tda998x_connector_funcs = {
-	.dpms = drm_atomic_helper_connector_dpms,
+	.dpms = tda998x_connector_dpms,
 	.reset = drm_atomic_helper_connector_reset,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = tda998x_connector_detect,
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index fcd77b2..051eab3 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -10,7 +10,6 @@
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
 	select TMPFS
-	select STOP_MACHINE
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0fc38bb..cf39ed3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -825,8 +825,11 @@
 		}
 
 		for_each_pipe(dev_priv, pipe) {
-			if (!intel_display_power_is_enabled(dev_priv,
-						POWER_DOMAIN_PIPE(pipe))) {
+			enum intel_display_power_domain power_domain;
+
+			power_domain = POWER_DOMAIN_PIPE(pipe);
+			if (!intel_display_power_get_if_enabled(dev_priv,
+								power_domain)) {
 				seq_printf(m, "Pipe %c power disabled\n",
 					   pipe_name(pipe));
 				continue;
@@ -840,6 +843,8 @@
 			seq_printf(m, "Pipe %c IER:\t%08x\n",
 				   pipe_name(pipe),
 				   I915_READ(GEN8_DE_PIPE_IER(pipe)));
+
+			intel_display_power_put(dev_priv, power_domain);
 		}
 
 		seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
@@ -3985,6 +3990,7 @@
 	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
 	struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev,
 									pipe));
+	enum intel_display_power_domain power_domain;
 	u32 val = 0; /* shut up gcc */
 	int ret;
 
@@ -3995,7 +4001,8 @@
 	if (pipe_crc->source && source)
 		return -EINVAL;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) {
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
 		DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
 		return -EIO;
 	}
@@ -4012,7 +4019,7 @@
 		ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val);
 
 	if (ret != 0)
-		return ret;
+		goto out;
 
 	/* none -> real source transition */
 	if (source) {
@@ -4024,8 +4031,10 @@
 		entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR,
 				  sizeof(pipe_crc->entries[0]),
 				  GFP_KERNEL);
-		if (!entries)
-			return -ENOMEM;
+		if (!entries) {
+			ret = -ENOMEM;
+			goto out;
+		}
 
 		/*
 		 * When IPS gets enabled, the pipe CRC changes. Since IPS gets
@@ -4081,7 +4090,12 @@
 		hsw_enable_ips(crtc);
 	}
 
-	return 0;
+	ret = 0;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3ac616d..f357058 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -501,7 +501,9 @@
 				WARN_ON(!IS_SKYLAKE(dev) &&
 					!IS_KABYLAKE(dev));
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
-				   (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) {
+				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
+				    pch->subsystem_vendor == 0x1af4 &&
+				    pch->subsystem_device == 0x1100)) {
 				dev_priv->pch_type = intel_virt_detect_pch(dev);
 			} else
 				continue;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f0f75d7..b0847b9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -751,6 +751,7 @@
 	uint32_t mmio_count;
 	i915_reg_t mmioaddr[8];
 	uint32_t mmiodata[8];
+	uint32_t dc_state;
 };
 
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
@@ -1988,6 +1989,9 @@
 #define I915_GTT_OFFSET_NONE ((u32)-1)
 
 struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
+
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
 	 * of pages before to binding them into the GTT, and put_pages() is
@@ -2003,6 +2007,7 @@
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+
 	int (*dmabuf_export)(struct drm_i915_gem_object *);
 	void (*release)(struct drm_i915_gem_object *);
 };
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ddc21d4..bb44bad 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4425,6 +4425,7 @@
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
 	.get_pages = i915_gem_object_get_pages_gtt,
 	.put_pages = i915_gem_object_put_pages_gtt,
 };
@@ -5261,7 +5262,7 @@
 	struct page *page;
 
 	/* Only default objects have per-page dirty tracking */
-	if (WARN_ON(obj->ops != &i915_gem_object_ops))
+	if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
 		return NULL;
 
 	page = i915_gem_object_get_page(obj, n);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 19fb0bdd..59e45b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -789,9 +789,10 @@
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
-	.dmabuf_export = i915_gem_userptr_dmabuf_export,
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
 	.get_pages = i915_gem_userptr_get_pages,
 	.put_pages = i915_gem_userptr_put_pages,
+	.dmabuf_export = i915_gem_userptr_dmabuf_export,
 	.release = i915_gem_userptr_release,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 007ae83..4897728 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3287,19 +3287,20 @@
 
 #define PORT_HOTPLUG_STAT	_MMIO(dev_priv->info.display_mmio_offset + 0x61114)
 /*
- * HDMI/DP bits are gen4+
+ * HDMI/DP bits are g4x+
  *
  * WARNING: Bspec for hpd status bits on gen4 seems to be completely confused.
  * Please check the detailed lore in the commit message for for experimental
  * evidence.
  */
-#define   PORTD_HOTPLUG_LIVE_STATUS_G4X		(1 << 29)
+/* Bspec says GM45 should match G4X/VLV/CHV, but reality disagrees */
+#define   PORTD_HOTPLUG_LIVE_STATUS_GM45	(1 << 29)
+#define   PORTC_HOTPLUG_LIVE_STATUS_GM45	(1 << 28)
+#define   PORTB_HOTPLUG_LIVE_STATUS_GM45	(1 << 27)
+/* G4X/VLV/CHV DP/HDMI bits again match Bspec */
+#define   PORTD_HOTPLUG_LIVE_STATUS_G4X		(1 << 27)
 #define   PORTC_HOTPLUG_LIVE_STATUS_G4X		(1 << 28)
-#define   PORTB_HOTPLUG_LIVE_STATUS_G4X		(1 << 27)
-/* VLV DP/HDMI bits again match Bspec */
-#define   PORTD_HOTPLUG_LIVE_STATUS_VLV		(1 << 27)
-#define   PORTC_HOTPLUG_LIVE_STATUS_VLV		(1 << 28)
-#define   PORTB_HOTPLUG_LIVE_STATUS_VLV		(1 << 29)
+#define   PORTB_HOTPLUG_LIVE_STATUS_G4X		(1 << 29)
 #define   PORTD_HOTPLUG_INT_STATUS		(3 << 21)
 #define   PORTD_HOTPLUG_INT_LONG_PULSE		(2 << 21)
 #define   PORTD_HOTPLUG_INT_SHORT_PULSE		(1 << 21)
@@ -7514,7 +7515,7 @@
 #define  DPLL_CFGCR2_PDIV_7 (4<<2)
 #define  DPLL_CFGCR2_CENTRAL_FREQ_MASK	(3)
 
-#define DPLL_CFGCR1(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR2)
+#define DPLL_CFGCR1(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR1)
 #define DPLL_CFGCR2(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2)
 
 /* BXT display engine PLL */
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index a2aa09c..a8af594 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -49,7 +49,7 @@
 		dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
 		dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
 		dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
-	} else if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+	} else if (INTEL_INFO(dev)->gen <= 4) {
 		dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL);
 		dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
 		dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
@@ -84,7 +84,7 @@
 		I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
 		I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
 		I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
-	} else if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
+	} else if (INTEL_INFO(dev)->gen <= 4) {
 		I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
 		I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
 		I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 31f6d21..30f9214 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -527,6 +527,8 @@
 
 	mutex_lock(&dev_priv->av_mutex);
 	intel_dig_port->audio_connector = connector;
+	/* referred in audio callbacks */
+	dev_priv->dig_port_map[port] = intel_encoder;
 	mutex_unlock(&dev_priv->av_mutex);
 
 	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
@@ -554,6 +556,7 @@
 
 	mutex_lock(&dev_priv->av_mutex);
 	intel_dig_port->audio_connector = NULL;
+	dev_priv->dig_port_map[port] = NULL;
 	mutex_unlock(&dev_priv->av_mutex);
 
 	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 9c89df1..a7b4a524 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -71,22 +71,29 @@
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(crt->adpa_reg);
 
 	if (!(tmp & ADPA_DAC_ENABLE))
-		return false;
+		goto out;
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
-	return true;
+	ret = true;
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static unsigned int intel_crt_get_flags(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9bb63a8..647d85e 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -240,6 +240,8 @@
 		I915_WRITE(dev_priv->csr.mmioaddr[i],
 			   dev_priv->csr.mmiodata[i]);
 	}
+
+	dev_priv->csr.dc_state = 0;
 }
 
 static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index e6408e5..084d558 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1589,7 +1589,8 @@
 			 DPLL_CFGCR2_KDIV(wrpll_params.kdiv) |
 			 DPLL_CFGCR2_PDIV(wrpll_params.pdiv) |
 			 wrpll_params.central_freq;
-	} else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) {
+	} else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+		   intel_encoder->type == INTEL_OUTPUT_DP_MST) {
 		switch (crtc_state->port_clock / 2) {
 		case 81000:
 			ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, 0);
@@ -1968,13 +1969,16 @@
 	enum transcoder cpu_transcoder;
 	enum intel_display_power_domain power_domain;
 	uint32_t tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(intel_encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
-	if (!intel_encoder->get_hw_state(intel_encoder, &pipe))
-		return false;
+	if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) {
+		ret = false;
+		goto out;
+	}
 
 	if (port == PORT_A)
 		cpu_transcoder = TRANSCODER_EDP;
@@ -1986,23 +1990,33 @@
 	switch (tmp & TRANS_DDI_MODE_SELECT_MASK) {
 	case TRANS_DDI_MODE_SELECT_HDMI:
 	case TRANS_DDI_MODE_SELECT_DVI:
-		return (type == DRM_MODE_CONNECTOR_HDMIA);
+		ret = type == DRM_MODE_CONNECTOR_HDMIA;
+		break;
 
 	case TRANS_DDI_MODE_SELECT_DP_SST:
-		if (type == DRM_MODE_CONNECTOR_eDP)
-			return true;
-		return (type == DRM_MODE_CONNECTOR_DisplayPort);
+		ret = type == DRM_MODE_CONNECTOR_eDP ||
+		      type == DRM_MODE_CONNECTOR_DisplayPort;
+		break;
+
 	case TRANS_DDI_MODE_SELECT_DP_MST:
 		/* if the transcoder is in MST state then
 		 * connector isn't connected */
-		return false;
+		ret = false;
+		break;
 
 	case TRANS_DDI_MODE_SELECT_FDI:
-		return (type == DRM_MODE_CONNECTOR_VGA);
+		ret = type == DRM_MODE_CONNECTOR_VGA;
+		break;
 
 	default:
-		return false;
+		ret = false;
+		break;
 	}
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
@@ -2014,15 +2028,18 @@
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
 	int i;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(DDI_BUF_CTL(port));
 
 	if (!(tmp & DDI_BUF_CTL_ENABLE))
-		return false;
+		goto out;
 
 	if (port == PORT_A) {
 		tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
@@ -2040,25 +2057,32 @@
 			break;
 		}
 
-		return true;
-	} else {
-		for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
-			tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+		ret = true;
 
-			if ((tmp & TRANS_DDI_PORT_MASK)
-			    == TRANS_DDI_SELECT_PORT(port)) {
-				if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST)
-					return false;
+		goto out;
+	}
 
-				*pipe = i;
-				return true;
-			}
+	for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
+		tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+
+		if ((tmp & TRANS_DDI_PORT_MASK) == TRANS_DDI_SELECT_PORT(port)) {
+			if ((tmp & TRANS_DDI_MODE_SELECT_MASK) ==
+			    TRANS_DDI_MODE_SELECT_DP_MST)
+				goto out;
+
+			*pipe = i;
+			ret = true;
+
+			goto out;
 		}
 	}
 
 	DRM_DEBUG_KMS("No pipe for ddi port %c found\n", port_name(port));
 
-	return false;
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc)
@@ -2507,12 +2531,14 @@
 {
 	uint32_t val;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
 	val = I915_READ(WRPLL_CTL(pll->id));
 	hw_state->wrpll = val;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
 	return val & WRPLL_PLL_ENABLE;
 }
 
@@ -2522,12 +2548,14 @@
 {
 	uint32_t val;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
 	val = I915_READ(SPLL_CTL);
 	hw_state->spll = val;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
 	return val & SPLL_PLL_ENABLE;
 }
 
@@ -2644,16 +2672,19 @@
 	uint32_t val;
 	unsigned int dpll;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
+	ret = false;
+
 	/* DPLL0 is not part of the shared DPLLs, so pll->id is 0 for DPLL1 */
 	dpll = pll->id + 1;
 
 	val = I915_READ(regs[pll->id].ctl);
 	if (!(val & LCPLL_PLL_ENABLE))
-		return false;
+		goto out;
 
 	val = I915_READ(DPLL_CTRL1);
 	hw_state->ctrl1 = (val >> (dpll * 6)) & 0x3f;
@@ -2663,8 +2694,12 @@
 		hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1);
 		hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2);
 	}
+	ret = true;
 
-	return true;
+out:
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+	return ret;
 }
 
 static void skl_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -2931,13 +2966,16 @@
 {
 	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
 	uint32_t val;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
+	ret = false;
+
 	val = I915_READ(BXT_PORT_PLL_ENABLE(port));
 	if (!(val & PORT_PLL_ENABLE))
-		return false;
+		goto out;
 
 	hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(port));
 	hw_state->ebb0 &= PORT_PLL_P1_MASK | PORT_PLL_P2_MASK;
@@ -2984,7 +3022,12 @@
 				 I915_READ(BXT_PORT_PCS_DW12_LN23(port)));
 	hw_state->pcsdw12 &= LANE_STAGGER_MASK | LANESTAGGER_STRAP_OVRD;
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+	return ret;
 }
 
 static void bxt_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -3119,11 +3162,15 @@
 {
 	u32 temp;
 
-	if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
+	if (intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
 		temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
+
+		intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
+
 		if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
 			return true;
 	}
+
 	return false;
 }
 
@@ -3311,7 +3358,6 @@
 	intel_encoder->get_config = intel_ddi_get_config;
 
 	intel_dig_port->port = port;
-	dev_priv->dig_port_map[port] = intel_encoder;
 	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
 					  (DDI_BUF_PORT_REVERSAL |
 					   DDI_A_4_LANES);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2f00828..46947ff 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1351,18 +1351,21 @@
 	bool cur_state;
 	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
 								      pipe);
+	enum intel_display_power_domain power_domain;
 
 	/* if we need the pipe quirk it must be always on */
 	if ((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) ||
 	    (pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE))
 		state = true;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-				POWER_DOMAIN_TRANSCODER(cpu_transcoder))) {
-		cur_state = false;
-	} else {
+	power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
 		u32 val = I915_READ(PIPECONF(cpu_transcoder));
 		cur_state = !!(val & PIPECONF_ENABLE);
+
+		intel_display_power_put(dev_priv, power_domain);
+	} else {
+		cur_state = false;
 	}
 
 	I915_STATE_WARN(cur_state != state,
@@ -2946,7 +2949,7 @@
 	struct i915_vma *vma;
 	u64 offset;
 
-	intel_fill_fb_ggtt_view(&view, intel_plane->base.fb,
+	intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
 				intel_plane->base.state);
 
 	vma = i915_gem_obj_to_ggtt_view(obj, &view);
@@ -8171,18 +8174,22 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_display_power_domain power_domain;
 	uint32_t tmp;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-					    POWER_DOMAIN_PIPE(crtc->pipe)))
+	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+	ret = false;
+
 	tmp = I915_READ(PIPECONF(crtc->pipe));
 	if (!(tmp & PIPECONF_ENABLE))
-		return false;
+		goto out;
 
 	if (IS_G4X(dev) || IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
 		switch (tmp & PIPECONF_BPC_MASK) {
@@ -8262,7 +8269,12 @@
 	pipe_config->base.adjusted_mode.crtc_clock =
 		pipe_config->port_clock / pipe_config->pixel_multiplier;
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void ironlake_init_pch_refclk(struct drm_device *dev)
@@ -9366,18 +9378,21 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_display_power_domain power_domain;
 	uint32_t tmp;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-					    POWER_DOMAIN_PIPE(crtc->pipe)))
+	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+	ret = false;
 	tmp = I915_READ(PIPECONF(crtc->pipe));
 	if (!(tmp & PIPECONF_ENABLE))
-		return false;
+		goto out;
 
 	switch (tmp & PIPECONF_BPC_MASK) {
 	case PIPECONF_6BPC:
@@ -9440,7 +9455,12 @@
 
 	ironlake_get_pfit_config(crtc, pipe_config);
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
@@ -9950,12 +9970,17 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	enum intel_display_power_domain pfit_domain;
+	enum intel_display_power_domain power_domain;
+	unsigned long power_domain_mask;
 	uint32_t tmp;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-					 POWER_DOMAIN_PIPE(crtc->pipe)))
+	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
+	power_domain_mask = BIT(power_domain);
+
+	ret = false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
@@ -9982,13 +10007,14 @@
 			pipe_config->cpu_transcoder = TRANSCODER_EDP;
 	}
 
-	if (!intel_display_power_is_enabled(dev_priv,
-			POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder)))
-		return false;
+	power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		goto out;
+	power_domain_mask |= BIT(power_domain);
 
 	tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
 	if (!(tmp & PIPECONF_ENABLE))
-		return false;
+		goto out;
 
 	haswell_get_ddi_port_state(crtc, pipe_config);
 
@@ -9998,14 +10024,14 @@
 		skl_init_scalers(dev, crtc, pipe_config);
 	}
 
-	pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
-
 	if (INTEL_INFO(dev)->gen >= 9) {
 		pipe_config->scaler_state.scaler_id = -1;
 		pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 
-	if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
+	power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+		power_domain_mask |= BIT(power_domain);
 		if (INTEL_INFO(dev)->gen >= 9)
 			skylake_get_pfit_config(crtc, pipe_config);
 		else
@@ -10023,7 +10049,13 @@
 		pipe_config->pixel_multiplier = 1;
 	}
 
-	return true;
+	ret = true;
+
+out:
+	for_each_power_domain(power_domain, power_domain_mask)
+		intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
@@ -12075,11 +12107,21 @@
 		pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
 	}
 
-	/* Clamp bpp to 8 on screens without EDID 1.4 */
-	if (connector->base.display_info.bpc == 0 && bpp > 24) {
-		DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
-			      bpp);
-		pipe_config->pipe_bpp = 24;
+	/* Clamp bpp to default limit on screens without EDID 1.4 */
+	if (connector->base.display_info.bpc == 0) {
+		int type = connector->base.connector_type;
+		int clamp_bpp = 24;
+
+		/* Fall back to 18 bpp when DP sink capability is unknown. */
+		if (type == DRM_MODE_CONNECTOR_DisplayPort ||
+		    type == DRM_MODE_CONNECTOR_eDP)
+			clamp_bpp = 18;
+
+		if (bpp > clamp_bpp) {
+			DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
+				      bpp, clamp_bpp);
+			pipe_config->pipe_bpp = clamp_bpp;
+		}
 	}
 }
 
@@ -13620,7 +13662,7 @@
 {
 	uint32_t val;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
 	val = I915_READ(PCH_DPLL(pll->id));
@@ -13628,6 +13670,8 @@
 	hw_state->fp0 = I915_READ(PCH_FP0(pll->id));
 	hw_state->fp1 = I915_READ(PCH_FP1(pll->id));
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
 	return val & DPLL_VCO_ENABLE;
 }
 
@@ -13883,11 +13927,12 @@
 	int max_scale = DRM_PLANE_HELPER_NO_SCALING;
 	bool can_position = false;
 
-	/* use scaler when colorkey is not required */
-	if (INTEL_INFO(plane->dev)->gen >= 9 &&
-	    state->ckey.flags == I915_SET_COLORKEY_NONE) {
-		min_scale = 1;
-		max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+	if (INTEL_INFO(plane->dev)->gen >= 9) {
+		/* use scaler when colorkey is not required */
+		if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
+			min_scale = 1;
+			max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+		}
 		can_position = true;
 	}
 
@@ -15557,10 +15602,12 @@
 	 * level, just check if the power well is enabled instead of trying to
 	 * follow the "don't touch the power well if we don't need it" policy
 	 * the rest of the driver uses. */
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_VGA))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA))
 		return;
 
 	i915_redisable_vga_power_on(dev);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
 }
 
 static bool primary_get_hw_state(struct intel_plane *plane)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 796e3d3..cdc2c15 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2362,15 +2362,18 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(intel_dp->output_reg);
 
 	if (!(tmp & DP_PORT_EN))
-		return false;
+		goto out;
 
 	if (IS_GEN7(dev) && port == PORT_A) {
 		*pipe = PORT_TO_PIPE_CPT(tmp);
@@ -2381,7 +2384,9 @@
 			u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
 			if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
 				*pipe = p;
-				return true;
+				ret = true;
+
+				goto out;
 			}
 		}
 
@@ -2393,7 +2398,12 @@
 		*pipe = PORT_TO_PIPE(tmp);
 	}
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void intel_dp_get_config(struct intel_encoder *encoder,
@@ -4493,20 +4503,20 @@
 	return I915_READ(PORT_HOTPLUG_STAT) & bit;
 }
 
-static bool vlv_digital_port_connected(struct drm_i915_private *dev_priv,
-				       struct intel_digital_port *port)
+static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv,
+					struct intel_digital_port *port)
 {
 	u32 bit;
 
 	switch (port->port) {
 	case PORT_B:
-		bit = PORTB_HOTPLUG_LIVE_STATUS_VLV;
+		bit = PORTB_HOTPLUG_LIVE_STATUS_GM45;
 		break;
 	case PORT_C:
-		bit = PORTC_HOTPLUG_LIVE_STATUS_VLV;
+		bit = PORTC_HOTPLUG_LIVE_STATUS_GM45;
 		break;
 	case PORT_D:
-		bit = PORTD_HOTPLUG_LIVE_STATUS_VLV;
+		bit = PORTD_HOTPLUG_LIVE_STATUS_GM45;
 		break;
 	default:
 		MISSING_CASE(port->port);
@@ -4558,8 +4568,8 @@
 		return cpt_digital_port_connected(dev_priv, port);
 	else if (IS_BROXTON(dev_priv))
 		return bxt_digital_port_connected(dev_priv, port);
-	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		return vlv_digital_port_connected(dev_priv, port);
+	else if (IS_GM45(dev_priv))
+		return gm45_digital_port_connected(dev_priv, port);
 	else
 		return g4x_digital_port_connected(dev_priv, port);
 }
@@ -6035,7 +6045,6 @@
 	}
 
 	intel_dig_port->port = port;
-	dev_priv->dig_port_map[port] = intel_encoder;
 	intel_dig_port->dp.output_reg = output_reg;
 
 	intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 8888793..0b8eefc 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -215,27 +215,46 @@
 	}
 }
 
+/*
+ * Pick training pattern for channel equalization. Training Pattern 3 for HBR2
+ * or 1.2 devices that support it, Training Pattern 2 otherwise.
+ */
+static u32 intel_dp_training_pattern(struct intel_dp *intel_dp)
+{
+	u32 training_pattern = DP_TRAINING_PATTERN_2;
+	bool source_tps3, sink_tps3;
+
+	/*
+	 * Intel platforms that support HBR2 also support TPS3. TPS3 support is
+	 * also mandatory for downstream devices that support HBR2. However, not
+	 * all sinks follow the spec.
+	 *
+	 * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
+	 * supported in source but still not enabled.
+	 */
+	source_tps3 = intel_dp_source_supports_hbr2(intel_dp);
+	sink_tps3 = drm_dp_tps3_supported(intel_dp->dpcd);
+
+	if (source_tps3 && sink_tps3) {
+		training_pattern = DP_TRAINING_PATTERN_3;
+	} else if (intel_dp->link_rate == 540000) {
+		if (!source_tps3)
+			DRM_DEBUG_KMS("5.4 Gbps link rate without source HBR2/TPS3 support\n");
+		if (!sink_tps3)
+			DRM_DEBUG_KMS("5.4 Gbps link rate without sink TPS3 support\n");
+	}
+
+	return training_pattern;
+}
+
 static void
 intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 {
 	bool channel_eq = false;
 	int tries, cr_tries;
-	uint32_t training_pattern = DP_TRAINING_PATTERN_2;
+	u32 training_pattern;
 
-	/*
-	 * Training Pattern 3 for HBR2 or 1.2 devices that support it.
-	 *
-	 * Intel platforms that support HBR2 also support TPS3. TPS3 support is
-	 * also mandatory for downstream devices that support HBR2.
-	 *
-	 * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
-	 * supported but still not enabled.
-	 */
-	if (intel_dp_source_supports_hbr2(intel_dp) &&
-	    drm_dp_tps3_supported(intel_dp->dpcd))
-		training_pattern = DP_TRAINING_PATTERN_3;
-	else if (intel_dp->link_rate == 540000)
-		DRM_ERROR("5.4 Gbps link rate without HBR2/TPS3 support\n");
+	training_pattern = intel_dp_training_pattern(intel_dp);
 
 	/* channel equalization */
 	if (!intel_dp_set_link_train(intel_dp,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ea54158..df7f3cb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1428,6 +1428,8 @@
 				      enum intel_display_power_domain domain);
 void intel_display_power_get(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
 
@@ -1514,6 +1516,7 @@
 	enable_rpm_wakeref_asserts(dev_priv)
 
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 44742fa..0193c62a 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -664,13 +664,16 @@
 	struct drm_device *dev = encoder->base.dev;
 	enum intel_display_power_domain power_domain;
 	enum port port;
+	bool ret;
 
 	DRM_DEBUG_KMS("\n");
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	/* XXX: this only works for one DSI output */
 	for_each_dsi_port(port, intel_dsi->ports) {
 		i915_reg_t ctrl_reg = IS_BROXTON(dev) ?
@@ -691,12 +694,16 @@
 		if (dpi_enabled || (func & CMD_MODE_DATA_WIDTH_MASK)) {
 			if (I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY) {
 				*pipe = port == PORT_A ? PIPE_A : PIPE_B;
-				return true;
+				ret = true;
+
+				goto out;
 			}
 		}
 	}
+out:
+	intel_display_power_put(dev_priv, power_domain);
 
-	return false;
+	return ret;
 }
 
 static void intel_dsi_get_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
index a5e99ac..e8113ad 100644
--- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
@@ -204,10 +204,28 @@
 	struct drm_device *dev = intel_dsi->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	if (dev_priv->vbt.dsi.seq_version >= 3)
+		data++;
+
 	gpio = *data++;
 
 	/* pull up/down */
-	action = *data++;
+	action = *data++ & 1;
+
+	if (gpio >= ARRAY_SIZE(gtable)) {
+		DRM_DEBUG_KMS("unknown gpio %u\n", gpio);
+		goto out;
+	}
+
+	if (!IS_VALLEYVIEW(dev_priv)) {
+		DRM_DEBUG_KMS("GPIO element not supported on this platform\n");
+		goto out;
+	}
+
+	if (dev_priv->vbt.dsi.seq_version >= 3) {
+		DRM_DEBUG_KMS("GPIO element v3 not supported\n");
+		goto out;
+	}
 
 	function = gtable[gpio].function_reg;
 	pad = gtable[gpio].pad_reg;
@@ -226,6 +244,7 @@
 	vlv_gpio_nc_write(dev_priv, pad, val);
 	mutex_unlock(&dev_priv->sb_lock);
 
+out:
 	return data;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 4a77639..616108c 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -880,15 +880,18 @@
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(intel_hdmi->hdmi_reg);
 
 	if (!(tmp & SDVO_ENABLE))
-		return false;
+		goto out;
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
@@ -897,7 +900,12 @@
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void intel_hdmi_get_config(struct intel_encoder *encoder,
@@ -2146,7 +2154,6 @@
 void intel_hdmi_init(struct drm_device *dev,
 		     i915_reg_t hdmi_reg, enum port port)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_digital_port *intel_dig_port;
 	struct intel_encoder *intel_encoder;
 	struct intel_connector *intel_connector;
@@ -2215,7 +2222,6 @@
 		intel_encoder->cloneable |= 1 << INTEL_OUTPUT_HDMI;
 
 	intel_dig_port->port = port;
-	dev_priv->dig_port_map[port] = intel_encoder;
 	intel_dig_port->hdmi.hdmi_reg = hdmi_reg;
 	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 25254b5..52fbe53 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -664,6 +664,12 @@
 
 		bus->adapter.algo = &gmbus_algorithm;
 
+		/*
+		 * We wish to retry with bit banging
+		 * after a timed out GMBUS attempt.
+		 */
+		bus->adapter.retries = 1;
+
 		/* By default use a conservative clock rate */
 		bus->reg0 = pin | GMBUS_RATE_100KHZ;
 
@@ -683,7 +689,7 @@
 	return 0;
 
 err:
-	while (--pin) {
+	while (pin--) {
 		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
 			continue;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3aa6147..f1fa756 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1707,6 +1707,7 @@
 	if (flush_domains) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 0da0240..bc04d8d 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -75,22 +75,30 @@
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(lvds_encoder->reg);
 
 	if (!(tmp & LVDS_PORT_EN))
-		return false;
+		goto out;
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void intel_lvds_get_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index eb5fa05..b28c29f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1783,16 +1783,20 @@
 				   const struct intel_plane_state *pstate,
 				   uint32_t mem_value)
 {
-	int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+	/*
+	 * We treat the cursor plane as always-on for the purposes of watermark
+	 * calculation.  Until we have two-stage watermark programming merged,
+	 * this is necessary to avoid flickering.
+	 */
+	int cpp = 4;
+	int width = pstate->visible ? pstate->base.crtc_w : 64;
 
-	if (!cstate->base.active || !pstate->visible)
+	if (!cstate->base.active)
 		return 0;
 
 	return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
 			      cstate->base.adjusted_mode.crtc_htotal,
-			      drm_rect_width(&pstate->dst),
-			      bpp,
-			      mem_value);
+			      width, cpp, mem_value);
 }
 
 /* Only for WM_LP. */
@@ -2825,7 +2829,10 @@
 	memset(ddb, 0, sizeof(*ddb));
 
 	for_each_pipe(dev_priv, pipe) {
-		if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
+		enum intel_display_power_domain power_domain;
+
+		power_domain = POWER_DOMAIN_PIPE(pipe);
+		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 			continue;
 
 		for_each_plane(dev_priv, pipe, plane) {
@@ -2837,6 +2844,8 @@
 		val = I915_READ(CUR_BUF_CFG(pipe));
 		skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
 					   val);
+
+		intel_display_power_put(dev_priv, power_domain);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 339701d..40c6aff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -331,6 +331,7 @@
 	if (flush_domains) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 	}
 	if (invalidate_domains) {
@@ -403,6 +404,7 @@
 	if (flush_domains) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 	}
 	if (invalidate_domains) {
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ddbdbff..4f43d9b 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -470,6 +470,43 @@
 	}
 }
 
+static void gen9_write_dc_state(struct drm_i915_private *dev_priv,
+				u32 state)
+{
+	int rewrites = 0;
+	int rereads = 0;
+	u32 v;
+
+	I915_WRITE(DC_STATE_EN, state);
+
+	/* It has been observed that disabling the dc6 state sometimes
+	 * doesn't stick and dmc keeps returning old value. Make sure
+	 * the write really sticks enough times and also force rewrite until
+	 * we are confident that state is exactly what we want.
+	 */
+	do  {
+		v = I915_READ(DC_STATE_EN);
+
+		if (v != state) {
+			I915_WRITE(DC_STATE_EN, state);
+			rewrites++;
+			rereads = 0;
+		} else if (rereads++ > 5) {
+			break;
+		}
+
+	} while (rewrites < 100);
+
+	if (v != state)
+		DRM_ERROR("Writing dc state to 0x%x failed, now 0x%x\n",
+			  state, v);
+
+	/* Most of the times we need one retry, avoid spam */
+	if (rewrites > 1)
+		DRM_DEBUG_KMS("Rewrote dc state to 0x%x %d times\n",
+			      state, rewrites);
+}
+
 static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
 {
 	uint32_t val;
@@ -494,10 +531,18 @@
 	val = I915_READ(DC_STATE_EN);
 	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
 		      val & mask, state);
+
+	/* Check if DMC is ignoring our DC state requests */
+	if ((val & mask) != dev_priv->csr.dc_state)
+		DRM_ERROR("DC state mismatch (0x%x -> 0x%x)\n",
+			  dev_priv->csr.dc_state, val & mask);
+
 	val &= ~mask;
 	val |= state;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
+
+	gen9_write_dc_state(dev_priv, val);
+
+	dev_priv->csr.dc_state = val & mask;
 }
 
 void bxt_enable_dc9(struct drm_i915_private *dev_priv)
@@ -1442,6 +1487,22 @@
 	chv_set_pipe_power_well(dev_priv, power_well, false);
 }
 
+static void
+__intel_display_power_get_domain(struct drm_i915_private *dev_priv,
+				 enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *power_well;
+	int i;
+
+	for_each_power_well(i, power_well, BIT(domain), power_domains) {
+		if (!power_well->count++)
+			intel_power_well_enable(dev_priv, power_well);
+	}
+
+	power_domains->domain_use_count[domain]++;
+}
+
 /**
  * intel_display_power_get - grab a power domain reference
  * @dev_priv: i915 device instance
@@ -1457,24 +1518,53 @@
 void intel_display_power_get(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain)
 {
-	struct i915_power_domains *power_domains;
-	struct i915_power_well *power_well;
-	int i;
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
 	intel_runtime_pm_get(dev_priv);
 
-	power_domains = &dev_priv->power_domains;
+	mutex_lock(&power_domains->lock);
+
+	__intel_display_power_get_domain(dev_priv, domain);
+
+	mutex_unlock(&power_domains->lock);
+}
+
+/**
+ * intel_display_power_get_if_enabled - grab a reference for an enabled display power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function grabs a power domain reference for @domain and ensures that the
+ * power domain and all its parents are powered up. Therefore users should only
+ * grab a reference to the innermost power domain they need.
+ *
+ * Any power domain reference obtained by this function must have a symmetric
+ * call to intel_display_power_put() to release the reference again.
+ */
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	bool is_enabled;
+
+	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+		return false;
 
 	mutex_lock(&power_domains->lock);
 
-	for_each_power_well(i, power_well, BIT(domain), power_domains) {
-		if (!power_well->count++)
-			intel_power_well_enable(dev_priv, power_well);
+	if (__intel_display_power_is_enabled(dev_priv, domain)) {
+		__intel_display_power_get_domain(dev_priv, domain);
+		is_enabled = true;
+	} else {
+		is_enabled = false;
 	}
 
-	power_domains->domain_use_count[domain]++;
-
 	mutex_unlock(&power_domains->lock);
+
+	if (!is_enabled)
+		intel_runtime_pm_put(dev_priv);
+
+	return is_enabled;
 }
 
 /**
@@ -2213,15 +2303,15 @@
  */
 void intel_power_domains_suspend(struct drm_i915_private *dev_priv)
 {
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
-		skl_display_core_uninit(dev_priv);
-
 	/*
 	 * Even if power well support was disabled we still want to disable
 	 * power wells while we are system suspended.
 	 */
 	if (!i915.disable_power_well)
 		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+		skl_display_core_uninit(dev_priv);
 }
 
 /**
@@ -2246,6 +2336,41 @@
 }
 
 /**
+ * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
+ * @dev_priv: i915 device instance
+ *
+ * This function grabs a device-level runtime pm reference if the device is
+ * already in use and ensures that it is powered up.
+ *
+ * Any runtime pm reference obtained by this function must have a symmetric
+ * call to intel_runtime_pm_put() to release the reference again.
+ */
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct device *device = &dev->pdev->dev;
+
+	if (IS_ENABLED(CONFIG_PM)) {
+		int ret = pm_runtime_get_if_in_use(device);
+
+		/*
+		 * In cases runtime PM is disabled by the RPM core and we get
+		 * an -EINVAL return value we are not supposed to call this
+		 * function, since the power state is undefined. This applies
+		 * atm to the late/early system suspend/resume handlers.
+		 */
+		WARN_ON_ONCE(ret < 0);
+		if (ret <= 0)
+			return false;
+	}
+
+	atomic_inc(&dev_priv->pm.wakeref_count);
+	assert_rpm_wakelock_held(dev_priv);
+
+	return true;
+}
+
+/**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
  * @dev_priv: i915 device instance
  *
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 30a5718..28722631 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -64,6 +64,7 @@
 	/* Start DC channel and DI after IDMAC */
 	ipu_dc_enable_channel(ipu_crtc->dc);
 	ipu_di_enable(ipu_crtc->di);
+	drm_crtc_vblank_on(&ipu_crtc->base);
 
 	ipu_crtc->enabled = 1;
 }
@@ -80,6 +81,7 @@
 	ipu_di_disable(ipu_crtc->di);
 	ipu_plane_disable(ipu_crtc->plane[0]);
 	ipu_dc_disable(ipu);
+	drm_crtc_vblank_off(&ipu_crtc->base);
 
 	ipu_crtc->enabled = 0;
 }
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 591ba2f..26bb1b6 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -42,6 +42,7 @@
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_YUV420,
 	DRM_FORMAT_YVU420,
+	DRM_FORMAT_RGB565,
 };
 
 int ipu_plane_irq(struct ipu_plane *ipu_plane)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 78f520d..e3acc35 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1520,7 +1520,7 @@
 				    DMA_BIDIRECTIONAL);
 
 		if (dma_mapping_error(pdev, addr)) {
-			while (--i) {
+			while (i--) {
 				dma_unmap_page(pdev, ttm_dma->dma_address[i],
 					       PAGE_SIZE, DMA_BIDIRECTIONAL);
 				ttm_dma->dma_address[i] = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 24be27d..20935eb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -635,10 +635,6 @@
 		nv_crtc->lut.depth = 0;
 	}
 
-	/* Make sure that drm and hw vblank irqs get resumed if needed. */
-	for (head = 0; head < dev->mode_config.num_crtc; head++)
-		drm_vblank_on(dev, head);
-
 	/* This should ensure we don't hit a locking problem when someone
 	 * wakes us up via a connector.  We should never go into suspend
 	 * while the display is on anyways.
@@ -648,6 +644,10 @@
 
 	drm_helper_resume_force_mode(dev);
 
+	/* Make sure that drm and hw vblank irqs get resumed if needed. */
+	for (head = 0; head < dev->mode_config.num_crtc; head++)
+		drm_vblank_on(dev, head);
+
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 8a70cec..2dfe58a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -24,7 +24,7 @@
 static int nouveau_platform_probe(struct platform_device *pdev)
 {
 	const struct nvkm_device_tegra_func *func;
-	struct nvkm_device *device;
+	struct nvkm_device *device = NULL;
 	struct drm_device *drm;
 	int ret;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 7f8a427..e7e581d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -252,32 +252,40 @@
 
 	if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
 		return -ENOMEM;
-	*pdevice = &tdev->device;
+
 	tdev->func = func;
 	tdev->pdev = pdev;
 	tdev->irq = -1;
 
 	tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(tdev->vdd))
-		return PTR_ERR(tdev->vdd);
+	if (IS_ERR(tdev->vdd)) {
+		ret = PTR_ERR(tdev->vdd);
+		goto free;
+	}
 
 	tdev->rst = devm_reset_control_get(&pdev->dev, "gpu");
-	if (IS_ERR(tdev->rst))
-		return PTR_ERR(tdev->rst);
+	if (IS_ERR(tdev->rst)) {
+		ret = PTR_ERR(tdev->rst);
+		goto free;
+	}
 
 	tdev->clk = devm_clk_get(&pdev->dev, "gpu");
-	if (IS_ERR(tdev->clk))
-		return PTR_ERR(tdev->clk);
+	if (IS_ERR(tdev->clk)) {
+		ret = PTR_ERR(tdev->clk);
+		goto free;
+	}
 
 	tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
-	if (IS_ERR(tdev->clk_pwr))
-		return PTR_ERR(tdev->clk_pwr);
+	if (IS_ERR(tdev->clk_pwr)) {
+		ret = PTR_ERR(tdev->clk_pwr);
+		goto free;
+	}
 
 	nvkm_device_tegra_probe_iommu(tdev);
 
 	ret = nvkm_device_tegra_power_up(tdev);
 	if (ret)
-		return ret;
+		goto remove;
 
 	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
@@ -285,9 +293,19 @@
 			       cfg, dbg, detect, mmio, subdev_mask,
 			       &tdev->device);
 	if (ret)
-		return ret;
+		goto powerdown;
+
+	*pdevice = &tdev->device;
 
 	return 0;
+
+powerdown:
+	nvkm_device_tegra_power_down(tdev);
+remove:
+	nvkm_device_tegra_remove_iommu(tdev);
+free:
+	kfree(tdev);
+	return ret;
 }
 #else
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
index 74e2f7c..9688970 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
@@ -328,6 +328,7 @@
 		.outp = outp,
 	}, *dp = &_dp;
 	u32 datarate = 0;
+	u8  pwr;
 	int ret;
 
 	if (!outp->base.info.location && disp->func->sor.magic)
@@ -355,6 +356,15 @@
 	/* disable link interrupt handling during link training */
 	nvkm_notify_put(&outp->irq);
 
+	/* ensure sink is not in a low-power state */
+	if (!nvkm_rdaux(outp->aux, DPCD_SC00, &pwr, 1)) {
+		if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
+			pwr &= ~DPCD_SC00_SET_POWER;
+			pwr |=  DPCD_SC00_SET_POWER_D0;
+			nvkm_wraux(outp->aux, DPCD_SC00, &pwr, 1);
+		}
+	}
+
 	/* enable down-spreading and execute pre-train script from vbios */
 	dp_link_train_init(dp, outp->dpcd[3] & 0x01);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
index 9596290..6e10c5e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
@@ -71,5 +71,11 @@
 #define DPCD_LS0C_LANE1_POST_CURSOR2                                       0x0c
 #define DPCD_LS0C_LANE0_POST_CURSOR2                                       0x03
 
+/* DPCD Sink Control */
+#define DPCD_SC00                                                       0x00600
+#define DPCD_SC00_SET_POWER                                                0x03
+#define DPCD_SC00_SET_POWER_D0                                             0x01
+#define DPCD_SC00_SET_POWER_D3                                             0x03
+
 void nvkm_dp_train(struct work_struct *);
 #endif
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index dfebdc4..85dfe36 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -573,10 +573,9 @@
 
 		kfree(omap_dmm->engines);
 		if (omap_dmm->refill_va)
-			dma_free_writecombine(omap_dmm->dev,
-				REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-				omap_dmm->refill_va,
-				omap_dmm->refill_pa);
+			dma_free_wc(omap_dmm->dev,
+				    REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+				    omap_dmm->refill_va, omap_dmm->refill_pa);
 		if (omap_dmm->dummy_page)
 			__free_page(omap_dmm->dummy_page);
 
@@ -701,9 +700,9 @@
 	omap_dmm->dummy_pa = page_to_phys(omap_dmm->dummy_page);
 
 	/* alloc refill memory */
-	omap_dmm->refill_va = dma_alloc_writecombine(&dev->dev,
-				REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-				&omap_dmm->refill_pa, GFP_KERNEL);
+	omap_dmm->refill_va = dma_alloc_wc(&dev->dev,
+					   REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+					   &omap_dmm->refill_pa, GFP_KERNEL);
 	if (!omap_dmm->refill_va) {
 		dev_err(&dev->dev, "could not allocate refill memory\n");
 		goto fail;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 8495a1a..359b0d7 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -1330,8 +1330,8 @@
 			omap_gem_detach_pages(obj);
 
 		if (!is_shmem(obj)) {
-			dma_free_writecombine(dev->dev, obj->size,
-					omap_obj->vaddr, omap_obj->paddr);
+			dma_free_wc(dev->dev, obj->size, omap_obj->vaddr,
+				    omap_obj->paddr);
 		} else if (omap_obj->vaddr) {
 			vunmap(omap_obj->vaddr);
 		}
@@ -1395,8 +1395,8 @@
 		/* attempt to allocate contiguous memory if we don't
 		 * have DMM for remappign discontiguous buffers
 		 */
-		omap_obj->vaddr =  dma_alloc_writecombine(dev->dev, size,
-				&omap_obj->paddr, GFP_KERNEL);
+		omap_obj->vaddr =  dma_alloc_wc(dev->dev, size,
+						&omap_obj->paddr, GFP_KERNEL);
 		if (!omap_obj->vaddr) {
 			kfree(omap_obj);
 
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 2ae8577..7c2e782 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -168,7 +168,8 @@
 		       cmd->command_size))
 		return -EFAULT;
 
-	reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL);
+	reloc_info = kmalloc_array(cmd->relocs_num,
+				   sizeof(struct qxl_reloc_info), GFP_KERNEL);
 	if (!reloc_info)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
index 3d031b5..9f029dd 100644
--- a/drivers/gpu/drm/qxl/qxl_prime.c
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -68,5 +68,5 @@
 		       struct vm_area_struct *area)
 {
 	WARN_ONCE(1, "not implemented");
-	return ENOSYS;
+	return -ENOSYS;
 }
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 44ee72e..6af8325 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -315,15 +315,27 @@
 	unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
 	unsigned lane_num, i, max_pix_clock;
 
-	for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
-			max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+	if (radeon_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+	    ENCODER_OBJECT_ID_NUTMEG) {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			max_pix_clock = (lane_num * 270000 * 8) / bpp;
 			if (max_pix_clock >= pix_clock) {
 				*dp_lanes = lane_num;
-				*dp_rate = link_rates[i];
+				*dp_rate = 270000;
 				return 0;
 			}
 		}
+	} else {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+				if (max_pix_clock >= pix_clock) {
+					*dp_lanes = lane_num;
+					*dp_rate = link_rates[i];
+					return 0;
+				}
+			}
+		}
 	}
 
 	return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 6bfc463..367a916 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -304,18 +304,10 @@
 		unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
 			DENTIST_DPREFCLK_WDIVIDER_MASK) >>
 			DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-		if (div < 128 && div >= 96)
-			div -= 64;
-		else if (div >= 64)
-			div = div / 2 - 16;
-		else if (div >= 8)
-			div /= 4;
-		else
-			div = 0;
+		div = radeon_audio_decode_dfs_div(div);
 
 		if (div)
-			clock = rdev->clock.gpupll_outputfreq * 10 / div;
+			clock = clock * 100 / div;
 
 		WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
 		WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 9953356..3cf04a2 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -289,6 +289,16 @@
 	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
 	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
 	 */
+	if (ASIC_IS_DCE41(rdev)) {
+		unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+			DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+			DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+		div = radeon_audio_decode_dfs_div(div);
+
+		if (div)
+			clock = 100 * clock / div;
+	}
+
 	WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
 	WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 4aa5f75..13b6029 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -511,6 +511,11 @@
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL			0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)		(((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK		(0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT		24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5ae6db9..78a51b3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -268,7 +268,7 @@
 	uint32_t current_dispclk;
 	uint32_t dp_extclk;
 	uint32_t max_pixel_clock;
-	uint32_t gpupll_outputfreq;
+	uint32_t vco_freq;
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 08fc1b5..de9a2ff 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1106,6 +1106,31 @@
 	ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	union igp_info *igp_info;
+	u8 frev, crev;
+	u16 data_offset;
+
+	if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+			&frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)(mode_info->atom_context->bios +
+			data_offset);
+		rdev->clock.vco_freq =
+			le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+	}
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@
 		rdev->mode_info.firmware_flags =
 			le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-		if (ASIC_IS_DCE8(rdev)) {
-			rdev->clock.gpupll_outputfreq =
+		if (ASIC_IS_DCE8(rdev))
+			rdev->clock.vco_freq =
 				le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-			if (rdev->clock.gpupll_outputfreq == 0)
-				rdev->clock.gpupll_outputfreq = 360000;	/* 3.6 GHz */
-		}
+		else if (ASIC_IS_DCE5(rdev))
+			rdev->clock.vco_freq = rdev->clock.current_dispclk;
+		else if (ASIC_IS_DCE41(rdev))
+			radeon_atombios_get_dentist_vco_freq(rdev);
+		else
+			rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+		if (rdev->clock.vco_freq == 0)
+			rdev->clock.vco_freq = 360000;	/* 3.6 GHz */
 
 		return true;
 	}
@@ -1270,14 +1301,6 @@
 	return false;
 }
 
-union igp_info {
-	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
 	struct radeon_mode_info *mode_info = &rdev->mode_info;
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 2c02e99..b214663 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -739,9 +739,6 @@
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-	struct radeon_connector_atom_dig *dig_connector =
-		radeon_connector->con_priv;
 
 	if (!dig || !dig->afmt)
 		return;
@@ -753,10 +750,7 @@
 		radeon_audio_write_speaker_allocation(encoder);
 		radeon_audio_write_sad_regs(encoder);
 		radeon_audio_write_latency_fields(encoder, mode);
-		if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-			radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-		else
-			radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+		radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
 		radeon_audio_set_audio_packet(encoder);
 		radeon_audio_select_pin(encoder);
 
@@ -781,3 +775,15 @@
 	if (radeon_encoder->audio && radeon_encoder->audio->dpms)
 		radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+	if (div >= 8 && div < 64)
+		return (div - 8) * 25 + 200;
+	else if (div >= 64 && div < 96)
+		return (div - 64) * 50 + 1600;
+	else if (div >= 96 && div < 128)
+		return (div - 96) * 100 + 3200;
+	else
+		return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 059cc30..5c70cce 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -79,5 +79,6 @@
 void radeon_audio_mode_set(struct drm_encoder *encoder,
 	struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 902b59c..4197ca1 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1744,7 +1744,6 @@
 	}
 
 	drm_kms_helper_poll_enable(dev);
-	drm_helper_hpd_irq_event(dev);
 
 	/* set the power state here in case we are a PX system or headless */
 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index b3bb923..2d9196a 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -403,7 +403,8 @@
 	struct drm_crtc *crtc = &radeon_crtc->base;
 	unsigned long flags;
 	int r;
-	int vpos, hpos, stat, min_udelay;
+	int vpos, hpos, stat, min_udelay = 0;
+	unsigned repcnt = 4;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
         down_read(&rdev->exclusive_lock);
@@ -454,7 +455,7 @@
 	 * In practice this won't execute very often unless on very fast
 	 * machines because the time window for this to happen is very small.
 	 */
-	for (;;) {
+	while (radeon_crtc->enabled && --repcnt) {
 		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
 		 * start in hpos, and to the "fudged earlier" vblank start in
 		 * vpos.
@@ -470,12 +471,24 @@
 			break;
 
 		/* Sleep at least until estimated real start of hw vblank */
-		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		if (min_udelay > vblank->framedur_ns / 2000) {
+			/* Don't wait ridiculously long - something is wrong */
+			repcnt = 0;
+			break;
+		}
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	};
 
+	if (!repcnt)
+		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+				 "framedur %d, linedur %d, stat %d, vpos %d, "
+				 "hpos %d\n", work->crtc_id, min_udelay,
+				 vblank->framedur_ns / 1000,
+				 vblank->linedur_ns / 1000, stat, vpos, hpos);
+
 	/* do the flip (mmio) */
 	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
 
@@ -1670,8 +1683,10 @@
 	/* setup afmt */
 	radeon_afmt_init(rdev);
 
-	radeon_fbdev_init(rdev);
-	drm_kms_helper_poll_init(rdev->ddev);
+	if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+		radeon_fbdev_init(rdev);
+		drm_kms_helper_poll_init(rdev->ddev);
+	}
 
 	/* do pm late init */
 	ret = radeon_pm_late_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 3dcc573..e26c963 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -663,6 +663,7 @@
 	bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
 	if (!bo_va) {
 		args->operation = RADEON_VA_RESULT_ERROR;
+		radeon_bo_unreserve(rbo);
 		drm_gem_object_unreference_unlocked(gobj);
 		return -ENOENT;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 84d4563..fb6ad14 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
+#include <drm/drm_cache.h>
 #include "radeon.h"
 #include "radeon_trace.h"
 
@@ -245,6 +246,12 @@
 		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
 			      "better performance thanks to write-combining\n");
 	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+#else
+	/* For architectures that don't support WC memory,
+	 * mask out the WC flag from the BO
+	 */
+	if (!drm_arch_can_wc_memory())
+		bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
 	radeon_ttm_placement_from_domain(bo, domain);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 460c8f2..7a98823 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -276,8 +276,12 @@
 	if (rdev->irq.installed) {
 		for (i = 0; i < rdev->num_crtc; i++) {
 			if (rdev->pm.active_crtcs & (1 << i)) {
-				rdev->pm.req_vblank |= (1 << i);
-				drm_vblank_get(rdev->ddev, i);
+				/* This can fail if a modeset is in progress */
+				if (drm_vblank_get(rdev->ddev, i) == 0)
+					rdev->pm.req_vblank |= (1 << i);
+				else
+					DRM_DEBUG_DRIVER("crtc %d no vblank, can glitch\n",
+							 i);
 			}
 		}
 	}
@@ -1078,10 +1082,6 @@
 	/* update displays */
 	radeon_dpm_display_configuration_changed(rdev);
 
-	rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
-	rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
-	rdev->pm.dpm.single_display = single_display;
-
 	/* wait for the rings to drain */
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		struct radeon_ring *ring = &rdev->ring[i];
@@ -1097,6 +1097,10 @@
 
 	radeon_dpm_post_set_power_state(rdev);
 
+	rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
+	rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
+	rdev->pm.dpm.single_display = single_display;
+
 	if (rdev->asic->dpm.force_performance_level) {
 		if (rdev->pm.dpm.thermal_active) {
 			enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index c507896..197b157 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -349,8 +349,13 @@
 			/* see if we can skip over some allocations */
 		} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
 
+		for (i = 0; i < RADEON_NUM_RINGS; ++i)
+			radeon_fence_ref(fences[i]);
+
 		spin_unlock(&sa_manager->wq.lock);
 		r = radeon_fence_wait_any(rdev, fences, false);
+		for (i = 0; i < RADEON_NUM_RINGS; ++i)
+			radeon_fence_unref(&fences[i]);
 		spin_lock(&sa_manager->wq.lock);
 		/* if we have nothing to wait for block */
 		if (r == -ENOENT) {
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e343074..e06ac54 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -758,7 +758,7 @@
 						       0, PAGE_SIZE,
 						       PCI_DMA_BIDIRECTIONAL);
 		if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) {
-			while (--i) {
+			while (i--) {
 				pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i],
 					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 				gtt->ttm.dma_address[i] = 0;
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index 07a0d37..a01efe3 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -178,12 +178,12 @@
 		return -EINVAL;
 	}
 
-	for (i = 0; i < sign->num; ++i) {
-		if (sign->val[i].chip_id == chip_id)
+	for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+		if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
 			break;
 	}
 
-	if (i == sign->num)
+	if (i == le32_to_cpu(sign->num))
 		return -EINVAL;
 
 	data += (256 - 64) / 4;
@@ -191,18 +191,18 @@
 	data[1] = sign->val[i].nonce[1];
 	data[2] = sign->val[i].nonce[2];
 	data[3] = sign->val[i].nonce[3];
-	data[4] = sign->len + 64;
+	data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
 	memset(&data[5], 0, 44);
 	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-	data += data[4] / 4;
+	data += le32_to_cpu(data[4]) / 4;
 	data[0] = sign->val[i].sigval[0];
 	data[1] = sign->val[i].sigval[1];
 	data[2] = sign->val[i].sigval[2];
 	data[3] = sign->val[i].sigval[3];
 
-	rdev->vce.keyselect = sign->val[i].keyselect;
+	rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index d1dc0f7..f6a809a 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -2,11 +2,11 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-		rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+		rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-				rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
index 7bfe243..f8f8f29 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
@@ -461,10 +461,11 @@
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-	unsigned int bpp, i, pre;
+	unsigned int i, pre;
 	unsigned long mpclk, pllref, tmp;
 	unsigned int m = 1, n = 1, target_mbps = 1000;
 	unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+	int bpp;
 
 	bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
 	if (bpp < 0) {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 8397d1b..a0d51cc 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -55,14 +55,12 @@
 
 	return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
 				    struct device *dev)
 {
 	arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 				 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@
 
 	priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
 						int pipe)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index f784488..3b8f652 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -39,7 +39,6 @@
 
 	return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
@@ -177,8 +176,23 @@
 		crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *				| <-- HW vsync irq and reg take effect
+ *	       plane_commit --> |
+ *	get_vblank and wait --> |
+ *				| <-- handle_vblank, vblank->count + 1
+ *		 cleanup_fb --> |
+ *		iommu crash --> |
+ *				| <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
@@ -194,6 +208,10 @@
 		if (!crtc->state->active)
 			continue;
 
+		if (!drm_atomic_helper_framebuffer_changed(dev,
+				old_state, crtc))
+			continue;
+
 		ret = drm_crtc_vblank_get(crtc);
 		if (ret != 0)
 			continue;
@@ -241,7 +259,7 @@
 
 	drm_atomic_helper_commit_planes(dev, state, true);
 
-	rockchip_atomic_wait_for_complete(state);
+	rockchip_atomic_wait_for_complete(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
index 50432e9..73718c5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
@@ -15,7 +15,18 @@
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+	return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index d908321..18e0733 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -234,13 +234,8 @@
 	/*
 	 * align to 64 bytes since Mali requires it.
 	 */
-	min_pitch = ALIGN(min_pitch, 64);
-
-	if (args->pitch < min_pitch)
-		args->pitch = min_pitch;
-
-	if (args->size < args->pitch * args->height)
-		args->size = args->pitch * args->height;
+	args->pitch = ALIGN(min_pitch, 64);
+	args->size = args->pitch * args->height;
 
 	rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
 						 &args->handle);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 46c2a8d..fd37054 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
 		__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-		__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+		__REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
 		REG_SET(x, win->base, win->phy->name, v, RELAXED)
@@ -58,16 +58,18 @@
 #define VOP_INTR_GET(vop, name) \
 		vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-		REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+		REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
 	do { \
-		int i, reg = 0; \
+		int i, reg = 0, mask = 0; \
 		for (i = 0; i < vop->data->intr->nintrs; i++) { \
-			if (vop->data->intr->intrs[i] & type) \
+			if (vop->data->intr->intrs[i] & type) { \
 				reg |= (v) << i; \
+				mask |= 1 << i; \
+			} \
 		} \
-		VOP_INTR_SET(vop, name, reg); \
+		VOP_INTR_SET(vop, name, mask, reg); \
 	} while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
 		vop_get_intr_type(vop, &vop->data->intr->name, type)
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index 8078631..bd736ac 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -157,17 +157,15 @@
 		cursor->height = src_h;
 
 		if (cursor->pixmap.base)
-			dma_free_writecombine(cursor->dev,
-					      cursor->pixmap.size,
-					      cursor->pixmap.base,
-					      cursor->pixmap.paddr);
+			dma_free_wc(cursor->dev, cursor->pixmap.size,
+				    cursor->pixmap.base, cursor->pixmap.paddr);
 
 		cursor->pixmap.size = cursor->width * cursor->height;
 
-		cursor->pixmap.base = dma_alloc_writecombine(cursor->dev,
-							cursor->pixmap.size,
-							&cursor->pixmap.paddr,
-							GFP_KERNEL | GFP_DMA);
+		cursor->pixmap.base = dma_alloc_wc(cursor->dev,
+						   cursor->pixmap.size,
+						   &cursor->pixmap.paddr,
+						   GFP_KERNEL | GFP_DMA);
 		if (!cursor->pixmap.base) {
 			DRM_ERROR("Failed to allocate memory for pixmap\n");
 			return;
@@ -252,8 +250,8 @@
 
 	/* Allocate clut buffer */
 	size = 0x100 * sizeof(unsigned short);
-	cursor->clut = dma_alloc_writecombine(dev, size, &cursor->clut_paddr,
-					      GFP_KERNEL | GFP_DMA);
+	cursor->clut = dma_alloc_wc(dev, size, &cursor->clut_paddr,
+				    GFP_KERNEL | GFP_DMA);
 
 	if (!cursor->clut) {
 		DRM_ERROR("Failed to allocate memory for cursor clut\n");
@@ -286,7 +284,7 @@
 	return &cursor->plane.drm_plane;
 
 err_plane:
-	dma_free_writecombine(dev, size, cursor->clut, cursor->clut_paddr);
+	dma_free_wc(dev, size, cursor->clut, cursor->clut_paddr);
 err_clut:
 	devm_kfree(dev, cursor);
 	return NULL;
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index f9a1d92..514551c 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -312,8 +312,7 @@
 	/* Allocate all the nodes within a single memory page */
 	size = sizeof(struct sti_gdp_node) *
 	    GDP_NODE_PER_FIELD * GDP_NODE_NB_BANK;
-	base = dma_alloc_writecombine(gdp->dev,
-				      size, &dma_addr, GFP_KERNEL | GFP_DMA);
+	base = dma_alloc_wc(gdp->dev, size, &dma_addr, GFP_KERNEL | GFP_DMA);
 
 	if (!base) {
 		DRM_ERROR("Failed to allocate memory for GDP node\n");
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 43861b5..1d3c3d0 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -617,9 +617,9 @@
 
 	/* Allocate memory for the VDP commands */
 	size = NB_VDP_CMD * sizeof(struct sti_hqvdp_cmd);
-	hqvdp->hqvdp_cmd = dma_alloc_writecombine(hqvdp->dev, size,
-					 &hqvdp->hqvdp_cmd_paddr,
-					 GFP_KERNEL | GFP_DMA);
+	hqvdp->hqvdp_cmd = dma_alloc_wc(hqvdp->dev, size,
+					&hqvdp->hqvdp_cmd_paddr,
+					GFP_KERNEL | GFP_DMA);
 	if (!hqvdp->hqvdp_cmd) {
 		DRM_ERROR("Failed to allocate memory for VDP cmd\n");
 		return;
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 33add93..3b0d8c3 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -175,8 +175,7 @@
 		sg_free_table(bo->sgt);
 		kfree(bo->sgt);
 	} else if (bo->vaddr) {
-		dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr,
-				      bo->paddr);
+		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
 	}
 }
 
@@ -233,8 +232,8 @@
 	} else {
 		size_t size = bo->gem.size;
 
-		bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
-						   GFP_KERNEL | __GFP_NOWARN);
+		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr,
+					 GFP_KERNEL | __GFP_NOWARN);
 		if (!bo->vaddr) {
 			dev_err(drm->dev,
 				"failed to allocate buffer of size %zu\n",
@@ -472,8 +471,8 @@
 		vma->vm_flags &= ~VM_PFNMAP;
 		vma->vm_pgoff = 0;
 
-		ret = dma_mmap_writecombine(gem->dev->dev, vma, bo->vaddr,
-					    bo->paddr, gem->size);
+		ret = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr,
+				  gem->size);
 		if (ret) {
 			drm_gem_vm_close(vma);
 			return ret;
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 18dfe3e..034ef2d 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -215,7 +215,7 @@
 	struct drm_gem_cma_object *cma_obj;
 
 	if (size == 0)
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	/* First, try to get a vc4_bo from the kernel BO cache. */
 	if (from_cache) {
@@ -237,7 +237,7 @@
 		if (IS_ERR(cma_obj)) {
 			DRM_ERROR("Failed to allocate from CMA:\n");
 			vc4_bo_stats_dump(vc4);
-			return NULL;
+			return ERR_PTR(-ENOMEM);
 		}
 	}
 
@@ -259,8 +259,8 @@
 		args->size = args->pitch * args->height;
 
 	bo = vc4_bo_create(dev, args->size, false);
-	if (!bo)
-		return -ENOMEM;
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
 
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 	drm_gem_object_unreference_unlocked(&bo->base.base);
@@ -398,9 +398,8 @@
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
-				    bo->base.vaddr, bo->base.paddr,
-				    vma->vm_end - vma->vm_start);
+	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
+			  bo->base.paddr, vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 
@@ -443,8 +442,8 @@
 	 * get zeroed, and that might leak data between users.
 	 */
 	bo = vc4_bo_create(dev, args->size, false);
-	if (!bo)
-		return -ENOMEM;
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
 
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 	drm_gem_object_unreference_unlocked(&bo->base.base);
@@ -496,8 +495,8 @@
 	}
 
 	bo = vc4_bo_create(dev, args->size, true);
-	if (!bo)
-		return -ENOMEM;
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
 
 	ret = copy_from_user(bo->base.vaddr,
 			     (void __user *)(uintptr_t)args->data,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 080865e..51a6333 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -91,8 +91,12 @@
 	struct vc4_bo *overflow_mem;
 	struct work_struct overflow_mem_work;
 
+	int power_refcount;
+
+	/* Mutex controlling the power refcount. */
+	struct mutex power_lock;
+
 	struct {
-		uint32_t last_ct0ca, last_ct1ca;
 		struct timer_list timer;
 		struct work_struct reset_work;
 	} hangcheck;
@@ -142,6 +146,7 @@
 };
 
 struct vc4_v3d {
+	struct vc4_dev *vc4;
 	struct platform_device *pdev;
 	void __iomem *regs;
 };
@@ -192,6 +197,11 @@
 	/* Sequence number for this bin/render job. */
 	uint64_t seqno;
 
+	/* Last current addresses the hardware was processing when the
+	 * hangcheck timer checked on us.
+	 */
+	uint32_t last_ct0ca, last_ct1ca;
+
 	/* Kernel-space copy of the ioctl arguments */
 	struct drm_vc4_submit_cl *args;
 
@@ -434,7 +444,6 @@
 extern struct platform_driver vc4_v3d_driver;
 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
 int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
-int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
 
 /* vc4_validate.c */
 int
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 48ce30a..202aa15 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/device.h>
 #include <linux/io.h>
 
@@ -228,8 +229,16 @@
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
 	DRM_INFO("Resetting GPU.\n");
-	vc4_v3d_set_power(vc4, false);
-	vc4_v3d_set_power(vc4, true);
+
+	mutex_lock(&vc4->power_lock);
+	if (vc4->power_refcount) {
+		/* Power the device off and back on the by dropping the
+		 * reference on runtime PM.
+		 */
+		pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
+		pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+	}
+	mutex_unlock(&vc4->power_lock);
 
 	vc4_irq_reset(dev);
 
@@ -257,10 +266,17 @@
 	struct drm_device *dev = (struct drm_device *)data;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	uint32_t ct0ca, ct1ca;
+	unsigned long irqflags;
+	struct vc4_exec_info *exec;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	exec = vc4_first_job(vc4);
 
 	/* If idle, we can stop watching for hangs. */
-	if (list_empty(&vc4->job_list))
+	if (!exec) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 		return;
+	}
 
 	ct0ca = V3D_READ(V3D_CTNCA(0));
 	ct1ca = V3D_READ(V3D_CTNCA(1));
@@ -268,14 +284,16 @@
 	/* If we've made any progress in execution, rearm the timer
 	 * and wait.
 	 */
-	if (ct0ca != vc4->hangcheck.last_ct0ca ||
-	    ct1ca != vc4->hangcheck.last_ct1ca) {
-		vc4->hangcheck.last_ct0ca = ct0ca;
-		vc4->hangcheck.last_ct1ca = ct1ca;
+	if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
+		exec->last_ct0ca = ct0ca;
+		exec->last_ct1ca = ct1ca;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 		vc4_queue_hangcheck(dev);
 		return;
 	}
 
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
 	/* We've gone too long with no progress, reset.  This has to
 	 * be done from a work struct, since resetting can sleep and
 	 * this timer hook isn't allowed to.
@@ -340,12 +358,7 @@
 	finish_wait(&vc4->job_wait_queue, &wait);
 	trace_vc4_wait_for_seqno_end(dev, seqno);
 
-	if (ret && ret != -ERESTARTSYS) {
-		DRM_ERROR("timeout waiting for render thread idle\n");
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 static void
@@ -578,9 +591,9 @@
 	}
 
 	bo = vc4_bo_create(dev, exec_size, true);
-	if (!bo) {
+	if (IS_ERR(bo)) {
 		DRM_ERROR("Couldn't allocate BO for binning\n");
-		ret = -ENOMEM;
+		ret = PTR_ERR(bo);
 		goto fail;
 	}
 	exec->exec_bo = &bo->base;
@@ -617,6 +630,7 @@
 static void
 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	unsigned i;
 
 	/* Need the struct lock for drm_gem_object_unreference(). */
@@ -635,6 +649,11 @@
 	}
 	mutex_unlock(&dev->struct_mutex);
 
+	mutex_lock(&vc4->power_lock);
+	if (--vc4->power_refcount == 0)
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+	mutex_unlock(&vc4->power_lock);
+
 	kfree(exec);
 }
 
@@ -746,6 +765,9 @@
 	struct drm_gem_object *gem_obj;
 	struct vc4_bo *bo;
 
+	if (args->pad != 0)
+		return -EINVAL;
+
 	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 	if (!gem_obj) {
 		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
@@ -772,7 +794,7 @@
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_vc4_submit_cl *args = data;
 	struct vc4_exec_info *exec;
-	int ret;
+	int ret = 0;
 
 	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
 		DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
@@ -785,6 +807,15 @@
 		return -ENOMEM;
 	}
 
+	mutex_lock(&vc4->power_lock);
+	if (vc4->power_refcount++ == 0)
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+	mutex_unlock(&vc4->power_lock);
+	if (ret < 0) {
+		kfree(exec);
+		return ret;
+	}
+
 	exec->args = args;
 	INIT_LIST_HEAD(&exec->unref_list);
 
@@ -839,6 +870,8 @@
 		    (unsigned long)dev);
 
 	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+
+	mutex_init(&vc4->power_lock);
 }
 
 void
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index b68060e..78a2135 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -57,7 +57,7 @@
 	struct vc4_bo *bo;
 
 	bo = vc4_bo_create(dev, 256 * 1024, true);
-	if (!bo) {
+	if (IS_ERR(bo)) {
 		DRM_ERROR("Couldn't allocate binner overflow mem\n");
 		return;
 	}
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 8a2a312..0f12418 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -316,20 +316,11 @@
 	size += xtiles * ytiles * loop_body_size;
 
 	setup->rcl = &vc4_bo_create(dev, size, true)->base;
-	if (!setup->rcl)
-		return -ENOMEM;
+	if (IS_ERR(setup->rcl))
+		return PTR_ERR(setup->rcl);
 	list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
 		      &exec->unref_list);
 
-	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
-	rcl_u32(setup,
-		(setup->color_write ? (setup->color_write->paddr +
-				       args->color_write.offset) :
-		 0));
-	rcl_u16(setup, args->width);
-	rcl_u16(setup, args->height);
-	rcl_u16(setup, args->color_write.bits);
-
 	/* The tile buffer gets cleared when the previous tile is stored.  If
 	 * the clear values changed between frames, then the tile buffer has
 	 * stale clear values in it, so we have to do a store in None mode (no
@@ -349,6 +340,15 @@
 		rcl_u32(setup, 0); /* no address, since we're in None mode */
 	}
 
+	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+	rcl_u32(setup,
+		(setup->color_write ? (setup->color_write->paddr +
+				       args->color_write.offset) :
+		 0));
+	rcl_u16(setup, args->width);
+	rcl_u16(setup, args->height);
+	rcl_u16(setup, args->color_write.bits);
+
 	for (y = min_y_tile; y <= max_y_tile; y++) {
 		for (x = min_x_tile; x <= max_x_tile; x++) {
 			bool first = (x == min_x_tile && y == min_y_tile);
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 424d515..31de5d1 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -17,6 +17,7 @@
  */
 
 #include "linux/component.h"
+#include "linux/pm_runtime.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -144,21 +145,6 @@
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
-int
-vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
-{
-	if (on)
-		return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-	else
-		return pm_generic_resume(&vc4->v3d->pdev->dev);
-}
-
 static void vc4_v3d_init_hw(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -170,6 +156,29 @@
 	V3D_WRITE(V3D_VPMBASE, 0);
 }
 
+#ifdef CONFIG_PM
+static int vc4_v3d_runtime_suspend(struct device *dev)
+{
+	struct vc4_v3d *v3d = dev_get_drvdata(dev);
+	struct vc4_dev *vc4 = v3d->vc4;
+
+	vc4_irq_uninstall(vc4->dev);
+
+	return 0;
+}
+
+static int vc4_v3d_runtime_resume(struct device *dev)
+{
+	struct vc4_v3d *v3d = dev_get_drvdata(dev);
+	struct vc4_dev *vc4 = v3d->vc4;
+
+	vc4_v3d_init_hw(vc4->dev);
+	vc4_irq_postinstall(vc4->dev);
+
+	return 0;
+}
+#endif
+
 static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -182,6 +191,8 @@
 	if (!v3d)
 		return -ENOMEM;
 
+	dev_set_drvdata(dev, v3d);
+
 	v3d->pdev = pdev;
 
 	v3d->regs = vc4_ioremap_regs(pdev, 0);
@@ -189,6 +200,7 @@
 		return PTR_ERR(v3d->regs);
 
 	vc4->v3d = v3d;
+	v3d->vc4 = vc4;
 
 	if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
 		DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
@@ -210,6 +222,8 @@
 		return ret;
 	}
 
+	pm_runtime_enable(dev);
+
 	return 0;
 }
 
@@ -219,6 +233,8 @@
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 
+	pm_runtime_disable(dev);
+
 	drm_irq_uninstall(drm);
 
 	/* Disable the binner's overflow memory address, so the next
@@ -231,6 +247,10 @@
 	vc4->v3d = NULL;
 }
 
+static const struct dev_pm_ops vc4_v3d_pm_ops = {
+	SET_RUNTIME_PM_OPS(vc4_v3d_runtime_suspend, vc4_v3d_runtime_resume, NULL)
+};
+
 static const struct component_ops vc4_v3d_ops = {
 	.bind   = vc4_v3d_bind,
 	.unbind = vc4_v3d_unbind,
@@ -258,5 +278,6 @@
 	.driver = {
 		.name = "vc4_v3d",
 		.of_match_table = vc4_v3d_dt_match,
+		.pm = &vc4_v3d_pm_ops,
 	},
 };
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index e26d9f6..24c2c74 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -401,8 +401,8 @@
 	tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
 				true);
 	exec->tile_bo = &tile_bo->base;
-	if (!exec->tile_bo)
-		return -ENOMEM;
+	if (IS_ERR(exec->tile_bo))
+		return PTR_ERR(exec->tile_bo);
 	list_add_tail(&tile_bo->unref_head, &exec->unref_list);
 
 	/* tile alloc address. */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c49812b..24fb348 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@
 static int __init vmwgfx_init(void)
 {
 	int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force())
+		return -EINVAL;
+#endif
+
 	ret = drm_pci_init(&driver, &vmw_pci_driver);
 	if (ret)
 		DRM_ERROR("Failed initializing DRM.\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index db082be..c5a1a08 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -563,6 +563,8 @@
 
 static const struct drm_connector_funcs vmw_sou_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_sou_connector_destroy,
 };
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index da462af..dd2dbb9 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -18,6 +18,7 @@
 #include <linux/host1x.h>
 #include <linux/of.h>
 #include <linux/slab.h>
+#include <linux/of_device.h>
 
 #include "bus.h"
 #include "dev.h"
@@ -394,6 +395,7 @@
 	device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
 	device->dev.dma_mask = &device->dev.coherent_dma_mask;
 	dev_set_name(&device->dev, "%s", driver->driver.name);
+	of_dma_configure(&device->dev, host1x->dev->of_node);
 	device->dev.release = host1x_device_release;
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 5a8c8d5..a18db4d 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -52,8 +52,8 @@
 	struct host1x *host1x = cdma_to_host1x(cdma);
 
 	if (pb->phys != 0)
-		dma_free_writecombine(host1x->dev, pb->size_bytes + 4,
-				      pb->mapped, pb->phys);
+		dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
+			    pb->phys);
 
 	pb->mapped = NULL;
 	pb->phys = 0;
@@ -76,8 +76,8 @@
 	pb->pos = 0;
 
 	/* allocate and map pushbuffer memory */
-	pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4,
-					    &pb->phys, GFP_KERNEL);
+	pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
+				  GFP_KERNEL);
 	if (!pb->mapped)
 		goto fail;
 
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 314bf37..ff34869 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -23,6 +23,7 @@
 #include <linux/of_device.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/dma-mapping.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/host1x.h>
@@ -68,6 +69,7 @@
 	.nb_bases	= 8,
 	.init		= host1x01_init,
 	.sync_offset	= 0x3000,
+	.dma_mask	= DMA_BIT_MASK(32),
 };
 
 static const struct host1x_info host1x02_info = {
@@ -77,6 +79,7 @@
 	.nb_bases = 12,
 	.init = host1x02_init,
 	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct host1x_info host1x04_info = {
@@ -86,6 +89,7 @@
 	.nb_bases = 64,
 	.init = host1x04_init,
 	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
 };
 
 static const struct host1x_info host1x05_info = {
@@ -95,6 +99,7 @@
 	.nb_bases = 64,
 	.init = host1x05_init,
 	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
 };
 
 static struct of_device_id host1x_of_match[] = {
@@ -148,6 +153,8 @@
 	if (IS_ERR(host->regs))
 		return PTR_ERR(host->regs);
 
+	dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
+
 	if (host->info->init) {
 		err = host->info->init(host);
 		if (err)
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 0b6e8e9..dace124 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -96,6 +96,7 @@
 	int	nb_mlocks;		/* host1x: number of mlocks */
 	int	(*init)(struct host1x *); /* initialize per SoC ops */
 	int	sync_offset;
+	u64	dma_mask;		/* mask of addressable memory */
 };
 
 struct host1x {
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 63bd63f..defa799 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -467,9 +467,8 @@
 		size += g->words * sizeof(u32);
 	}
 
-	job->gather_copy_mapped = dma_alloc_writecombine(dev, size,
-							 &job->gather_copy,
-							 GFP_KERNEL);
+	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
+					       GFP_KERNEL);
 	if (!job->gather_copy_mapped) {
 		job->gather_copy_mapped = NULL;
 		return -ENOMEM;
@@ -578,9 +577,8 @@
 	job->num_unpins = 0;
 
 	if (job->gather_copy_size)
-		dma_free_writecombine(job->channel->dev, job->gather_copy_size,
-				      job->gather_copy_mapped,
-				      job->gather_copy);
+		dma_free_wc(job->channel->dev, job->gather_copy_size,
+		            job->gather_copy_mapped, job->gather_copy);
 }
 EXPORT_SYMBOL(host1x_job_unpin);
 
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index f2e13eb..e00db3f 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1050,6 +1050,17 @@
 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
 		const struct ipu_platform_reg *reg = &client_reg[i];
 		struct platform_device *pdev;
+		struct device_node *of_node;
+
+		/* Associate subdevice with the corresponding port node */
+		of_node = of_graph_get_port_by_id(dev->of_node, i);
+		if (!of_node) {
+			dev_info(dev,
+				 "no port@%d node in %s, not using %s%d\n",
+				 i, dev->of_node->full_name,
+				 (i / 2) ? "DI" : "CSI", i % 2);
+			continue;
+		}
 
 		pdev = platform_device_alloc(reg->name, id++);
 		if (!pdev) {
@@ -1057,17 +1068,9 @@
 			goto err_register;
 		}
 
+		pdev->dev.of_node = of_node;
 		pdev->dev.parent = dev;
 
-		/* Associate subdevice with the corresponding port node */
-		pdev->dev.of_node = of_graph_get_port_by_id(dev->of_node, i);
-		if (!pdev->dev.of_node) {
-			dev_err(dev, "missing port@%d node in %s\n", i,
-				dev->of_node->full_name);
-			ret = -ENODEV;
-			goto err_register;
-		}
-
 		ret = platform_device_add_data(pdev, &reg->pdata,
 					       sizeof(reg->pdata));
 		if (!ret)
@@ -1289,10 +1292,6 @@
 	ipu->irq_sync = irq_sync;
 	ipu->irq_err = irq_err;
 
-	ret = ipu_irq_init(ipu);
-	if (ret)
-		goto out_failed_irq;
-
 	ret = device_reset(&pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to reset: %d\n", ret);
@@ -1302,6 +1301,10 @@
 	if (ret)
 		goto out_failed_reset;
 
+	ret = ipu_irq_init(ipu);
+	if (ret)
+		goto out_failed_irq;
+
 	/* Set MCU_T to divide MCU access window into 2 */
 	ipu_cm_write(ipu, 0x00400000L | (IPU_MCU_T_DEFAULT << 18),
 			IPU_DISP_GEN);
@@ -1324,9 +1327,9 @@
 failed_add_clients:
 	ipu_submodules_exit(ipu);
 failed_submodules_init:
-out_failed_reset:
 	ipu_irq_exit(ipu);
 out_failed_irq:
+out_failed_reset:
 	clk_disable_unprepare(ipu->clk);
 	return ret;
 }
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index f155b83..2b3105c 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -126,7 +126,7 @@
 	struct ads1015_data *data = i2c_get_clientdata(client);
 	unsigned int pga = data->channel_data[channel].pga;
 	int fullscale = fullscale_table[pga];
-	const unsigned mask = data->id == ads1115 ? 0x7fff : 0x7ff0;
+	const int mask = data->id == ads1115 ? 0x7fff : 0x7ff0;
 
 	return DIV_ROUND_CLOSEST(reg * fullscale, mask);
 }
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c848789..c43318d 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -932,6 +932,17 @@
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 	{
 		/*
+		 * CPU fan speed going up and down on Dell Studio XPS 8000
+		 * for unknown reasons.
+		 */
+		.ident = "Dell Studio XPS 8000",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+		},
+	},
+	{
+		/*
 		 * CPU fan speed going up and down on Dell Studio XPS 8100
 		 * for unknown reasons.
 		 */
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index f77eb97..4f695d8 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -90,7 +90,15 @@
 	pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
 				  REG_TDP_LIMIT3, &val);
 
-	tdp_limit = val >> 16;
+	/*
+	 * On Carrizo and later platforms, ApmTdpLimit bit field
+	 * is extended to 16:31 from 16:28.
+	 */
+	if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+		tdp_limit = val >> 16;
+	else
+		tdp_limit = (val >> 16) & 0x1fff;
+
 	curr_pwr_watts = ((u64)(tdp_limit +
 				data->base_tdp)) << running_avg_range;
 	curr_pwr_watts -= running_avg_capture;
diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
index 82de3de..685568b 100644
--- a/drivers/hwmon/gpio-fan.c
+++ b/drivers/hwmon/gpio-fan.c
@@ -406,16 +406,11 @@
 				  unsigned long *state)
 {
 	struct gpio_fan_data *fan_data = cdev->devdata;
-	int r;
 
 	if (!fan_data)
 		return -EINVAL;
 
-	r = get_fan_speed_index(fan_data);
-	if (r < 0)
-		return r;
-
-	*state = r;
+	*state = fan_data->speed_index;
 	return 0;
 }
 
diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index 52f708b..d50c701 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -313,6 +313,10 @@
 		hwlock = radix_tree_deref_slot(slot);
 		if (unlikely(!hwlock))
 			continue;
+		if (radix_tree_is_indirect_ptr(hwlock)) {
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
 
 		if (hwlock->bank->dev->of_node == args.np) {
 			ret = 0;
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 3711df1..4a45408 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -586,8 +586,7 @@
 	if (!dev)
 		return -ENOMEM;
 
-	dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(struct bsc_regs *),
-				       GFP_KERNEL);
+	dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(*dev->bsc_regmap), GFP_KERNEL);
 	if (!dev->bsc_regmap)
 		return -ENOMEM;
 
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index ba9732c..10fbd6d 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -874,7 +874,8 @@
 	i2c_set_adapdata(adap, dev);
 
 	i2c_dw_disable_int(dev);
-	r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, IRQF_SHARED,
+	r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+			     IRQF_SHARED | IRQF_COND_SUSPEND,
 			     dev_name(dev->dev), dev);
 	if (r) {
 		dev_err(dev->dev, "failure requesting irq %i: %d\n",
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index f62d697..27fa0cb 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1271,6 +1271,8 @@
 	switch (dev->device) {
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
+	case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
+	case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
 	case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
 		priv->features |= FEATURE_I2C_BLOCK_READ;
 		priv->features |= FEATURE_IRQ;
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 08d26ba..13c4529 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1450,7 +1450,8 @@
 
 err_unuse_clocks:
 	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
-	pm_runtime_put(omap->dev);
+	pm_runtime_dont_use_autosuspend(omap->dev);
+	pm_runtime_put_sync(omap->dev);
 	pm_runtime_disable(&pdev->dev);
 err_free_mem:
 
@@ -1468,6 +1469,7 @@
 		return ret;
 
 	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return 0;
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index e045985..93f2895 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -137,10 +137,11 @@
 };
 
 /* SB800 globals */
+static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
-	"SDA0", "SDA2", "SDA3", "SDA4"
+	" port 0", " port 2", " port 3", " port 4"
 };
-static const char *piix4_aux_port_name_sb800 = "SDA1";
+static const char *piix4_aux_port_name_sb800 = " port 1";
 
 struct i2c_piix4_adapdata {
 	unsigned short smba;
@@ -148,7 +149,6 @@
 	/* SB800 */
 	bool sb800_main;
 	unsigned short port;
-	struct mutex *mutex;
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -275,10 +275,12 @@
 	else
 		smb_en = (aux) ? 0x28 : 0x2c;
 
+	mutex_lock(&piix4_mutex_sb800);
 	outb_p(smb_en, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
 	outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
 	smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
+	mutex_unlock(&piix4_mutex_sb800);
 
 	if (!smb_en) {
 		smb_en_status = smba_en_lo & 0x10;
@@ -559,7 +561,7 @@
 	u8 port;
 	int retval;
 
-	mutex_lock(adapdata->mutex);
+	mutex_lock(&piix4_mutex_sb800);
 
 	outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -574,7 +576,7 @@
 
 	outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-	mutex_unlock(adapdata->mutex);
+	mutex_unlock(&piix4_mutex_sb800);
 
 	return retval;
 }
@@ -625,6 +627,7 @@
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+			     bool sb800_main, unsigned short port,
 			     const char *name, struct i2c_adapter **padap)
 {
 	struct i2c_adapter *adap;
@@ -639,7 +642,8 @@
 
 	adap->owner = THIS_MODULE;
 	adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-	adap->algo = &smbus_algorithm;
+	adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800
+				: &smbus_algorithm;
 
 	adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
 	if (adapdata == NULL) {
@@ -649,12 +653,14 @@
 	}
 
 	adapdata->smba = smba;
+	adapdata->sb800_main = sb800_main;
+	adapdata->port = port;
 
 	/* set up the sysfs linkage to our parent device */
 	adap->dev.parent = &dev->dev;
 
 	snprintf(adap->name, sizeof(adap->name),
-		"SMBus PIIX4 adapter %s at %04x", name, smba);
+		"SMBus PIIX4 adapter%s at %04x", name, smba);
 
 	i2c_set_adapdata(adap, adapdata);
 
@@ -673,30 +679,16 @@
 
 static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 {
-	struct mutex *mutex;
 	struct i2c_piix4_adapdata *adapdata;
 	int port;
 	int retval;
 
-	mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-	if (mutex == NULL)
-		return -ENOMEM;
-
-	mutex_init(mutex);
-
 	for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
-		retval = piix4_add_adapter(dev, smba,
+		retval = piix4_add_adapter(dev, smba, true, port,
 					   piix4_main_port_names_sb800[port],
 					   &piix4_main_adapters[port]);
 		if (retval < 0)
 			goto error;
-
-		piix4_main_adapters[port]->algo = &piix4_smbus_algorithm_sb800;
-
-		adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
-		adapdata->sb800_main = true;
-		adapdata->port = port;
-		adapdata->mutex = mutex;
 	}
 
 	return retval;
@@ -714,19 +706,20 @@
 		}
 	}
 
-	kfree(mutex);
-
 	return retval;
 }
 
 static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int retval;
+	bool is_sb800 = false;
 
 	if ((dev->vendor == PCI_VENDOR_ID_ATI &&
 	     dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
 	     dev->revision >= 0x40) ||
 	    dev->vendor == PCI_VENDOR_ID_AMD) {
+		is_sb800 = true;
+
 		if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
 			dev_err(&dev->dev,
 			"SMBus base address index region 0x%x already in use!\n",
@@ -756,7 +749,7 @@
 			return retval;
 
 		/* Try to register main SMBus adapter, give up if we can't */
-		retval = piix4_add_adapter(dev, retval, "main",
+		retval = piix4_add_adapter(dev, retval, false, 0, "",
 					   &piix4_main_adapters[0]);
 		if (retval < 0)
 			return retval;
@@ -783,7 +776,8 @@
 	if (retval > 0) {
 		/* Try to add the aux adapter if it exists,
 		 * piix4_add_adapter will clean up if this fails */
-		piix4_add_adapter(dev, retval, piix4_aux_port_name_sb800,
+		piix4_add_adapter(dev, retval, false, 0,
+				  is_sb800 ? piix4_aux_port_name_sb800 : "",
 				  &piix4_aux_adapter);
 	}
 
@@ -798,10 +792,8 @@
 		i2c_del_adapter(adap);
 		if (adapdata->port == 0) {
 			release_region(adapdata->smba, SMBIOSIZE);
-			if (adapdata->sb800_main) {
-				kfree(adapdata->mutex);
+			if (adapdata->sb800_main)
 				release_region(SB800_PIIX4_SMB_IDX, 2);
-			}
 		}
 		kfree(adapdata);
 		kfree(adap);
diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index f3e5ff8..213ba55 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -467,7 +467,7 @@
 		bus_speed = UNIPHIER_FI2C_DEFAULT_SPEED;
 
 	if (!bus_speed) {
-		dev_err(dev, "clock-freqyency should not be zero\n");
+		dev_err(dev, "clock-frequency should not be zero\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c
index 1f4f3f5..89eaa8a 100644
--- a/drivers/i2c/busses/i2c-uniphier.c
+++ b/drivers/i2c/busses/i2c-uniphier.c
@@ -328,7 +328,7 @@
 		bus_speed = UNIPHIER_I2C_DEFAULT_SPEED;
 
 	if (!bus_speed) {
-		dev_err(dev, "clock-freqyency should not be zero\n");
+		dev_err(dev, "clock-frequency should not be zero\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index edc29b1..833ea9d 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -213,6 +213,7 @@
 config STK8BA50
 	tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
 	depends on I2C
+	depends on IIO_TRIGGER
 	help
 	  Say yes here to get support for the Sensortek STK8BA50 3-axis
 	  accelerometer.
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 605ff42..283ded7 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -175,6 +175,7 @@
 config EXYNOS_ADC
 	tristate "Exynos ADC driver support"
 	depends on ARCH_EXYNOS || ARCH_S3C24XX || ARCH_S3C64XX || (OF && COMPILE_TEST)
+	depends on HAS_IOMEM
 	help
 	  Core support for the ADC block found in the Samsung EXYNOS series
 	  of SoCs for drivers such as the touchscreen and hwmon to use to share
@@ -207,6 +208,7 @@
 config IMX7D_ADC
 	tristate "IMX7D ADC driver"
 	depends on ARCH_MXC || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  Say yes here to build support for IMX7D ADC.
 
@@ -409,6 +411,7 @@
 config VF610_ADC
 	tristate "Freescale vf610 ADC driver"
 	depends on OF
+	depends on HAS_IOMEM
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
 	help
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 942320e..c1e0553 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -289,7 +289,7 @@
 		goto error_kfifo_free;
 
 	indio_dev->setup_ops = setup_ops;
-	indio_dev->modes |= INDIO_BUFFER_HARDWARE;
+	indio_dev->modes |= INDIO_BUFFER_SOFTWARE;
 
 	return 0;
 
diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 43d1458..b4dde83 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -300,6 +300,7 @@
 	data->client = client;
 
 	indio_dev->dev.parent = &client->dev;
+	indio_dev->name = id->name;
 	indio_dev->info = &mcp4725_info;
 	indio_dev->channels = &mcp4725_channel;
 	indio_dev->num_channels = 1;
diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c
index 1165b1c..cfc5a05 100644
--- a/drivers/iio/humidity/dht11.c
+++ b/drivers/iio/humidity/dht11.c
@@ -117,7 +117,7 @@
 	if (((hum_int + hum_dec + temp_int + temp_dec) & 0xff) != checksum)
 		return -EIO;
 
-	dht11->timestamp = ktime_get_real_ns();
+	dht11->timestamp = ktime_get_boot_ns();
 	if (hum_int < 20) {  /* DHT22 */
 		dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
 					((temp_int & 0x80) ? -100 : 100);
@@ -145,7 +145,7 @@
 
 	/* TODO: Consider making the handler safe for IRQ sharing */
 	if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
-		dht11->edges[dht11->num_edges].ts = ktime_get_real_ns();
+		dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
 		dht11->edges[dht11->num_edges++].value =
 						gpio_get_value(dht11->gpio);
 
@@ -164,7 +164,7 @@
 	int ret, timeres;
 
 	mutex_lock(&dht11->lock);
-	if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_real_ns()) {
+	if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) {
 		timeres = ktime_get_resolution_ns();
 		if (DHT11_DATA_BIT_HIGH < 2 * timeres) {
 			dev_err(dht11->dev, "timeresolution %dns too low\n",
@@ -279,7 +279,7 @@
 		return -EINVAL;
 	}
 
-	dht11->timestamp = ktime_get_real_ns() - DHT11_DATA_VALID_TIME - 1;
+	dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1;
 	dht11->num_edges = -1;
 
 	platform_set_drvdata(pdev, iio);
diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
index cb32b59..36607d5 100644
--- a/drivers/iio/imu/adis_buffer.c
+++ b/drivers/iio/imu/adis_buffer.c
@@ -43,7 +43,7 @@
 		return -ENOMEM;
 
 	rx = adis->buffer;
-	tx = rx + indio_dev->scan_bytes;
+	tx = rx + scan_count;
 
 	spi_message_init(&adis->msg);
 
diff --git a/drivers/iio/imu/inv_mpu6050/Kconfig b/drivers/iio/imu/inv_mpu6050/Kconfig
index 48fbc0b..8f8d137 100644
--- a/drivers/iio/imu/inv_mpu6050/Kconfig
+++ b/drivers/iio/imu/inv_mpu6050/Kconfig
@@ -5,9 +5,9 @@
 config INV_MPU6050_IIO
 	tristate "Invensense MPU6050 devices"
 	depends on I2C && SYSFS
+	depends on I2C_MUX
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
-	select I2C_MUX
 	help
 	  This driver supports the Invensense MPU6050 devices.
 	  This driver can also support MPU6500 in MPU6050 compatibility mode
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 80fbbfd..734a004 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -349,6 +349,8 @@
 
 void iio_channel_release(struct iio_channel *channel)
 {
+	if (!channel)
+		return;
 	iio_device_put(channel->indio_dev);
 	kfree(channel);
 }
diff --git a/drivers/iio/light/acpi-als.c b/drivers/iio/light/acpi-als.c
index 60537ec..53201d9 100644
--- a/drivers/iio/light/acpi-als.c
+++ b/drivers/iio/light/acpi-als.c
@@ -54,7 +54,9 @@
 			.realbits	= 32,
 			.storagebits	= 32,
 		},
-		.info_mask_separate	= BIT(IIO_CHAN_INFO_RAW),
+		/* _RAW is here for backward ABI compatibility */
+		.info_mask_separate	= BIT(IIO_CHAN_INFO_RAW) |
+					  BIT(IIO_CHAN_INFO_PROCESSED),
 	},
 };
 
@@ -152,7 +154,7 @@
 	s32 temp_val;
 	int ret;
 
-	if (mask != IIO_CHAN_INFO_RAW)
+	if ((mask != IIO_CHAN_INFO_PROCESSED) && (mask != IIO_CHAN_INFO_RAW))
 		return -EINVAL;
 
 	/* we support only illumination (_ALI) so far. */
diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index 809a961..6bf89d8 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c
@@ -180,7 +180,7 @@
 			{500000, 2000000}
 };
 
-static unsigned int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
+static int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
 					   int len, int val, int val2)
 {
 	int i, freq;
diff --git a/drivers/iio/pressure/mpl115.c b/drivers/iio/pressure/mpl115.c
index f5ecd6e..a0d7dee 100644
--- a/drivers/iio/pressure/mpl115.c
+++ b/drivers/iio/pressure/mpl115.c
@@ -117,7 +117,7 @@
 		*val = ret >> 6;
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_OFFSET:
-		*val = 605;
+		*val = -605;
 		*val2 = 750000;
 		return IIO_VAL_INT_PLUS_MICRO;
 	case IIO_CHAN_INFO_SCALE:
diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index 93e29fb..db35e04 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -87,7 +87,7 @@
 
 	ret = i2c_transfer(client->adapter, msg, 2);
 
-	return (ret == 2) ? 0 : ret;
+	return (ret == 2) ? 0 : -EIO;
 }
 
 static int lidar_smbus_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 00da80e..94b80a5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -358,6 +358,7 @@
 	ret = device->query_device(device, &device->attrs, &uhw);
 	if (ret) {
 		printk(KERN_WARNING "Couldn't query the device attributes\n");
+		ib_cache_cleanup_one(device);
 		goto out;
 	}
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index f334090..1e37f35 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1071,7 +1071,7 @@
 		}
 	}
 
-	if (rec->hop_limit > 1 || use_roce) {
+	if (rec->hop_limit > 0 || use_roce) {
 		ah_attr->ah_flags = IB_AH_GRH;
 		ah_attr->grh.dgid = rec->dgid;
 
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 3de9351..14606af 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -336,7 +336,6 @@
 	union ib_gid gid;
 	struct ib_gid_attr gid_attr = {};
 	ssize_t ret;
-	va_list args;
 
 	ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
 			   &gid_attr);
@@ -348,7 +347,6 @@
 err:
 	if (gid_attr.ndev)
 		dev_put(gid_attr.ndev);
-	va_end(args);
 	return ret;
 }
 
@@ -722,12 +720,11 @@
 
 	if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
 				&cpi, 40, sizeof(cpi)) >= 0) {
-
-		if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
+		if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH)
 			/* We have extended counters */
 			return &pma_group_ext;
 
-		if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+		if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
 			/* But not the IETF ones */
 			return &pma_group_noietf;
 	}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 19837d2..2116132 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -322,6 +322,8 @@
 		      int    immediate_present,
 		      struct ib_ud_header *header)
 {
+	size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
 	grh_present = grh_present && !ip_version;
 	memset(header, 0, sizeof *header);
 
@@ -353,7 +355,8 @@
 	if (ip_version == 6 || grh_present) {
 		header->grh.ip_version      = 6;
 		header->grh.payload_length  =
-			cpu_to_be16((IB_BTH_BYTES     +
+			cpu_to_be16((udp_bytes        +
+				     IB_BTH_BYTES     +
 				     IB_DETH_BYTES    +
 				     payload_bytes    +
 				     4                + /* ICRC     */
@@ -362,8 +365,6 @@
 	}
 
 	if (ip_version == 4) {
-		int udp_bytes = udp_present ? IB_UDP_BYTES : 0;
-
 		header->ip4.ver = 4; /* version 4 */
 		header->ip4.hdr_len = 5; /* 5 words */
 		header->ip4.tot_len =
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6ffc9c4..6c6fbff 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1970,7 +1970,8 @@
 		   resp_size);
 	INIT_UDATA(&uhw, buf + sizeof(cmd),
 		   (unsigned long)cmd.response + resp_size,
-		   in_len - sizeof(cmd), out_len - resp_size);
+		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - resp_size);
 
 	memset(&cmd_ex, 0, sizeof(cmd_ex));
 	cmd_ex.user_handle = cmd.user_handle;
@@ -3413,7 +3414,8 @@
 
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
+		   in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof resp);
 
 	ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
 	if (ret)
@@ -3439,7 +3441,8 @@
 
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
+		   in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof resp);
 
 	ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
 	if (ret)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 26833bf..d68f506 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -817,17 +817,48 @@
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
-static void edit_counter(struct mlx4_counter *cnt,
-					struct ib_pma_portcounters *pma_cnt)
+static void edit_counter(struct mlx4_counter *cnt, void *counters,
+			 __be16 attr_id)
 {
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
-			     (be64_to_cpu(cnt->tx_bytes) >> 2));
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
-			     (be64_to_cpu(cnt->rx_bytes) >> 2));
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
-			     be64_to_cpu(cnt->tx_frames));
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
-			     be64_to_cpu(cnt->rx_frames));
+	switch (attr_id) {
+	case IB_PMA_PORT_COUNTERS:
+	{
+		struct ib_pma_portcounters *pma_cnt =
+			(struct ib_pma_portcounters *)counters;
+
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
+				     (be64_to_cpu(cnt->tx_bytes) >> 2));
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
+				     (be64_to_cpu(cnt->rx_bytes) >> 2));
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
+				     be64_to_cpu(cnt->tx_frames));
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
+				     be64_to_cpu(cnt->rx_frames));
+		break;
+	}
+	case IB_PMA_PORT_COUNTERS_EXT:
+	{
+		struct ib_pma_portcounters_ext *pma_cnt_ext =
+			(struct ib_pma_portcounters_ext *)counters;
+
+		pma_cnt_ext->port_xmit_data =
+			cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
+		pma_cnt_ext->port_rcv_data =
+			cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
+		pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
+		pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
+		break;
+	}
+	}
+}
+
+static int iboe_process_mad_port_info(void *out_mad)
+{
+	struct ib_class_port_info cpi = {};
+
+	cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+	memcpy(out_mad, &cpi, sizeof(cpi));
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -842,6 +873,9 @@
 	if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
 		return -EINVAL;
 
+	if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
+		return iboe_process_mad_port_info((void *)(out_mad->data + 40));
+
 	memset(&counter_stats, 0, sizeof(counter_stats));
 	mutex_lock(&dev->counters_table[port_num - 1].mutex);
 	list_for_each_entry(tmp_counter,
@@ -863,7 +897,8 @@
 		switch (counter_stats.counter_mode & 0xf) {
 		case 0:
 			edit_counter(&counter_stats,
-				     (void *)(out_mad->data + 40));
+				     (void *)(out_mad->data + 40),
+				     in_mad->mad_hdr.attr_id);
 			err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 			break;
 		default:
@@ -894,8 +929,10 @@
 	 */
 	if (link == IB_LINK_LAYER_INFINIBAND) {
 		if (mlx4_is_slave(dev->dev) &&
-		    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
-		    in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS)
+		    (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+		     (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+		      in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+		      in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
 			return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
 						in_grh, in_mad, out_mad);
 
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bc5536f..fd97534 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1681,9 +1681,12 @@
 	}
 
 	if (qp->ibqp.uobject)
-		context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
+		context->usr_page = cpu_to_be32(
+			mlx4_to_hw_uar_index(dev->dev,
+					     to_mucontext(ibqp->uobject->context)->uar.index));
 	else
-		context->usr_page = cpu_to_be32(dev->priv_uar.index);
+		context->usr_page = cpu_to_be32(
+			mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
 
 	if (attr_mask & IB_QP_DEST_QPN)
 		context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ec737e2..03c418c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -844,6 +844,8 @@
 	int err;
 	int i;
 	size_t reqlen;
+	size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+				     max_cqe_version);
 
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
@@ -854,7 +856,7 @@
 	reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 	if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
 		ver = 0;
-	else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+	else if (reqlen >= min_req_v2)
 		ver = 2;
 	else
 		return ERR_PTR(-EINVAL);
@@ -2214,7 +2216,9 @@
 		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
 		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
 	dev->ib_dev.uverbs_ex_cmd_mask =
-		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)	|
+		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
 	dev->ib_dev.query_port		= mlx5_ib_query_port;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8fb9c27..34cb8e8 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -270,8 +270,10 @@
 		/* fall through */
 	case IB_QPT_RC:
 		size += sizeof(struct mlx5_wqe_ctrl_seg) +
-			sizeof(struct mlx5_wqe_atomic_seg) +
-			sizeof(struct mlx5_wqe_raddr_seg);
+			max(sizeof(struct mlx5_wqe_atomic_seg) +
+			    sizeof(struct mlx5_wqe_raddr_seg),
+			    sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+			    sizeof(struct mlx5_mkey_seg));
 		break;
 
 	case IB_QPT_XRC_TGT:
@@ -279,9 +281,9 @@
 
 	case IB_QPT_UC:
 		size += sizeof(struct mlx5_wqe_ctrl_seg) +
-			sizeof(struct mlx5_wqe_raddr_seg) +
-			sizeof(struct mlx5_wqe_umr_ctrl_seg) +
-			sizeof(struct mlx5_mkey_seg);
+			max(sizeof(struct mlx5_wqe_raddr_seg),
+			    sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+			    sizeof(struct mlx5_mkey_seg));
 		break;
 
 	case IB_QPT_UD:
@@ -1036,7 +1038,7 @@
 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq, wq, end_padding_mode,
-		 MLX5_GET64(qpc, qpc, end_padding_mode));
+		 MLX5_GET(qpc, qpc, end_padding_mode));
 	MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
 	MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
 	MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
@@ -1615,15 +1617,6 @@
 
 	if (pd) {
 		dev = to_mdev(pd->device);
-	} else {
-		/* being cautious here */
-		if (init_attr->qp_type != IB_QPT_XRC_TGT &&
-		    init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
-			pr_warn("%s: no PD for transport %s\n", __func__,
-				ib_qp_type_str(init_attr->qp_type));
-			return ERR_PTR(-EINVAL);
-		}
-		dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
 
 		if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
 			if (!pd->uobject) {
@@ -1634,6 +1627,15 @@
 				return ERR_PTR(-EINVAL);
 			}
 		}
+	} else {
+		/* being cautious here */
+		if (init_attr->qp_type != IB_QPT_XRC_TGT &&
+		    init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
+			pr_warn("%s: no PD for transport %s\n", __func__,
+				ib_qp_type_str(init_attr->qp_type));
+			return ERR_PTR(-EINVAL);
+		}
+		dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
 	}
 
 	switch (init_attr->qp_type) {
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4659256..3b2ddd6 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -75,7 +75,8 @@
 
 static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 			   struct mlx5_create_srq_mbox_in **in,
-			   struct ib_udata *udata, int buf_size, int *inlen)
+			   struct ib_udata *udata, int buf_size, int *inlen,
+			   int is_xrc)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_create_srq ucmd = {};
@@ -87,13 +88,8 @@
 	int ncont;
 	u32 offset;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
-	int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 
-	if (drv_data < 0)
-		return -EINVAL;
-
-	ucmdlen = (drv_data < sizeof(ucmd)) ?
-		  drv_data : sizeof(ucmd);
+	ucmdlen = min(udata->inlen, sizeof(ucmd));
 
 	if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
 		mlx5_ib_dbg(dev, "failed copy udata\n");
@@ -103,15 +99,17 @@
 	if (ucmd.reserved0 || ucmd.reserved1)
 		return -EINVAL;
 
-	if (drv_data > sizeof(ucmd) &&
+	if (udata->inlen > sizeof(ucmd) &&
 	    !ib_is_udata_cleared(udata, sizeof(ucmd),
-				 drv_data - sizeof(ucmd)))
+				 udata->inlen - sizeof(ucmd)))
 		return -EINVAL;
 
-	err = get_srq_user_index(to_mucontext(pd->uobject->context),
-				 &ucmd, udata->inlen, &uidx);
-	if (err)
-		return err;
+	if (is_xrc) {
+		err = get_srq_user_index(to_mucontext(pd->uobject->context),
+					 &ucmd, udata->inlen, &uidx);
+		if (err)
+			return err;
+	}
 
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
@@ -151,7 +149,8 @@
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
-	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+	if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+	     is_xrc){
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 				     xrc_srq_context_entry);
 		MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
@@ -170,7 +169,7 @@
 
 static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 			     struct mlx5_create_srq_mbox_in **in, int buf_size,
-			     int *inlen)
+			     int *inlen, int is_xrc)
 {
 	int err;
 	int i;
@@ -224,7 +223,8 @@
 
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
-	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+	if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+	     is_xrc){
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 				     xrc_srq_context_entry);
 		/* 0xffffff means we ask to work with cqe version 0 */
@@ -302,10 +302,14 @@
 		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
 		    srq->msrq.max_avail_gather);
 
+	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
 	if (pd->uobject)
-		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+				      is_xrc);
 	else
-		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+					is_xrc);
 
 	if (err) {
 		mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -313,7 +317,6 @@
 		goto err_srq;
 	}
 
-	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
 	in->ctx.state_log_sz = ilog2(srq->msrq.max);
 	flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
 	xrcdn = 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 040bb8b..12503f1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -323,9 +323,6 @@
 			 */
 	u32 max_hw_cqe;
 	bool phase_change;
-	bool deferred_arm, deferred_sol;
-	bool first_arm;
-
 	spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
 						   * to cq polling
 						   */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 5738493..f387430 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -228,6 +228,11 @@
 
 	ocrdma_alloc_pd_pool(dev);
 
+	if (!ocrdma_alloc_stats_resources(dev)) {
+		pr_err("%s: stats resource allocation failed\n", __func__);
+		goto alloc_err;
+	}
+
 	spin_lock_init(&dev->av_tbl.lock);
 	spin_lock_init(&dev->flush_q_lock);
 	return 0;
@@ -238,6 +243,7 @@
 
 static void ocrdma_free_resources(struct ocrdma_dev *dev)
 {
+	ocrdma_release_stats_resources(dev);
 	kfree(dev->stag_arr);
 	kfree(dev->qp_tbl);
 	kfree(dev->cq_tbl);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 86c303a..255f774 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -64,10 +64,11 @@
 	return cpy_len;
 }
 
-static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
 {
 	struct stats_mem *mem = &dev->stats_mem;
 
+	mutex_init(&dev->stats_lock);
 	/* Alloc mbox command mem*/
 	mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
 			sizeof(struct ocrdma_rdma_stats_resp));
@@ -91,13 +92,14 @@
 	return true;
 }
 
-static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+void ocrdma_release_stats_resources(struct ocrdma_dev *dev)
 {
 	struct stats_mem *mem = &dev->stats_mem;
 
 	if (mem->va)
 		dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
 				  mem->va, mem->pa);
+	mem->va = NULL;
 	kfree(mem->debugfs_mem);
 }
 
@@ -838,15 +840,9 @@
 				&dev->reset_stats, &ocrdma_dbg_ops))
 		goto err;
 
-	/* Now create dma_mem for stats mbx command */
-	if (!ocrdma_alloc_stats_mem(dev))
-		goto err;
-
-	mutex_init(&dev->stats_lock);
 
 	return;
 err:
-	ocrdma_release_stats_mem(dev);
 	debugfs_remove_recursive(dev->dir);
 	dev->dir = NULL;
 }
@@ -855,9 +851,7 @@
 {
 	if (!dev->dir)
 		return;
-	debugfs_remove(dev->dir);
-	mutex_destroy(&dev->stats_lock);
-	ocrdma_release_stats_mem(dev);
+	debugfs_remove_recursive(dev->dir);
 }
 
 void ocrdma_init_debugfs(void)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
index c9e58d0..bba1fec 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -65,6 +65,8 @@
 
 void ocrdma_rem_debugfs(void);
 void ocrdma_init_debugfs(void);
+bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev);
+void ocrdma_release_stats_resources(struct ocrdma_dev *dev);
 void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
 void ocrdma_add_port_stats(struct ocrdma_dev *dev);
 int ocrdma_pma_counters(struct ocrdma_dev *dev,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index d4c687b..12420e4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -125,8 +125,8 @@
 					IB_DEVICE_SYS_IMAGE_GUID |
 					IB_DEVICE_LOCAL_DMA_LKEY |
 					IB_DEVICE_MEM_MGT_EXTENSIONS;
-	attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
-	attr->max_sge_rd = 0;
+	attr->max_sge = dev->attr.max_send_sge;
+	attr->max_sge_rd = attr->max_sge;
 	attr->max_cq = dev->attr.max_cq;
 	attr->max_cqe = dev->attr.max_cqe;
 	attr->max_mr = dev->attr.max_mr;
@@ -1094,7 +1094,6 @@
 	spin_lock_init(&cq->comp_handler_lock);
 	INIT_LIST_HEAD(&cq->sq_head);
 	INIT_LIST_HEAD(&cq->rq_head);
-	cq->first_arm = true;
 
 	if (ib_ctx) {
 		uctx = get_ocrdma_ucontext(ib_ctx);
@@ -2726,8 +2725,7 @@
 		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
 	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
 						OCRDMA_CQE_SRCQP_MASK;
-	ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
-						OCRDMA_CQE_PKEY_MASK;
+	ibwc->pkey_index = 0;
 	ibwc->wc_flags = IB_WC_GRH;
 	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
 					OCRDMA_CQE_UD_XFER_LEN_SHIFT);
@@ -2911,12 +2909,9 @@
 	}
 stop_cqe:
 	cq->getp = cur_getp;
-	if (cq->deferred_arm || polled_hw_cqes) {
-		ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm,
-				  cq->deferred_sol, polled_hw_cqes);
-		cq->deferred_arm = false;
-		cq->deferred_sol = false;
-	}
+
+	if (polled_hw_cqes)
+		ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
 
 	return i;
 }
@@ -3000,13 +2995,7 @@
 	if (cq_flags & IB_CQ_SOLICITED)
 		sol_needed = true;
 
-	if (cq->first_arm) {
-		ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
-		cq->first_arm = false;
-	}
-
-	cq->deferred_arm = true;
-	cq->deferred_sol = sol_needed;
+	ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
 	spin_unlock_irqrestore(&cq->cq_lock, flags);
 
 	return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5ea0c14..fa9c42f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -245,8 +245,6 @@
 	skb_reset_mac_header(skb);
 	skb_pull(skb, IPOIB_ENCAP_LEN);
 
-	skb->truesize = SKB_TRUESIZE(skb->len);
-
 	++dev->stats.rx_packets;
 	dev->stats.rx_bytes += skb->len;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 050dfa1..2588931 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -456,7 +456,10 @@
 	return status;
 }
 
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
+/*
+ * Caller must hold 'priv->lock'
+ */
+static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_sa_multicast *multicast;
@@ -466,6 +469,10 @@
 	ib_sa_comp_mask comp_mask;
 	int ret = 0;
 
+	if (!priv->broadcast ||
+	    !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+		return -EINVAL;
+
 	ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
 
 	rec.mgid     = mcast->mcmember.mgid;
@@ -525,20 +532,23 @@
 			rec.join_state = 4;
 #endif
 	}
+	spin_unlock_irq(&priv->lock);
 
 	multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
 					 &rec, comp_mask, GFP_KERNEL,
 					 ipoib_mcast_join_complete, mcast);
+	spin_lock_irq(&priv->lock);
 	if (IS_ERR(multicast)) {
 		ret = PTR_ERR(multicast);
 		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-		spin_lock_irq(&priv->lock);
 		/* Requeue this join task with a backoff delay */
 		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 		spin_unlock_irq(&priv->lock);
 		complete(&mcast->done);
+		spin_lock_irq(&priv->lock);
 	}
+	return 0;
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -620,9 +630,10 @@
 				/* Found the next unjoined group */
 				init_completion(&mcast->done);
 				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-				spin_unlock_irq(&priv->lock);
-				ipoib_mcast_join(dev, mcast);
-				spin_lock_irq(&priv->lock);
+				if (ipoib_mcast_join(dev, mcast)) {
+					spin_unlock_irq(&priv->lock);
+					return;
+				}
 			} else if (!delay_until ||
 				 time_before(mcast->delay_until, delay_until))
 				delay_until = mcast->delay_until;
@@ -641,10 +652,9 @@
 	if (mcast) {
 		init_completion(&mcast->done);
 		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		ipoib_mcast_join(dev, mcast);
 	}
 	spin_unlock_irq(&priv->lock);
-	if (mcast)
-		ipoib_mcast_join(dev, mcast);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 6727954..e8a84d1 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1207,7 +1207,6 @@
 #else
 static int xpad_led_probe(struct usb_xpad *xpad) { return 0; }
 static void xpad_led_disconnect(struct usb_xpad *xpad) { }
-static void xpad_identify_controller(struct usb_xpad *xpad) { }
 #endif
 
 static int xpad_start_input(struct usb_xpad *xpad)
diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 4d446d5..c01a1d6 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c
@@ -235,7 +235,7 @@
 	unsigned short gpimapsize;
 	unsigned extend_cfg;
 	bool is_adp5585;
-	bool adp5585_support_row5;
+	bool support_row5;
 #ifdef CONFIG_GPIOLIB
 	unsigned char gpiomap[ADP5589_MAXGPIO];
 	bool export_gpio;
@@ -485,7 +485,7 @@
 	if (kpad->extend_cfg & C4_EXTEND_CFG)
 		pin_used[kpad->var->c4_extend_cfg] = true;
 
-	if (!kpad->adp5585_support_row5)
+	if (!kpad->support_row5)
 		pin_used[5] = true;
 
 	for (i = 0; i < kpad->var->maxgpio; i++)
@@ -884,12 +884,13 @@
 
 	switch (id->driver_data) {
 	case ADP5585_02:
-		kpad->adp5585_support_row5 = true;
+		kpad->support_row5 = true;
 	case ADP5585_01:
 		kpad->is_adp5585 = true;
 		kpad->var = &const_adp5585;
 		break;
 	case ADP5589:
+		kpad->support_row5 = true;
 		kpad->var = &const_adp5589;
 		break;
 	}
diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c
index 378db10..4401be2 100644
--- a/drivers/input/keyboard/cap11xx.c
+++ b/drivers/input/keyboard/cap11xx.c
@@ -304,8 +304,10 @@
 		led->cdev.brightness = LED_OFF;
 
 		error = of_property_read_u32(child, "reg", &reg);
-		if (error != 0 || reg >= num_leds)
+		if (error != 0 || reg >= num_leds) {
+			of_node_put(child);
 			return -EINVAL;
+		}
 
 		led->reg = reg;
 		led->priv = priv;
@@ -313,8 +315,10 @@
 		INIT_WORK(&led->work, cap11xx_led_work);
 
 		error = devm_led_classdev_register(dev, &led->cdev);
-		if (error)
+		if (error) {
+			of_node_put(child);
 			return error;
+		}
 
 		priv->num_leds++;
 		led++;
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index d6d16fa..1f2337a 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -733,7 +733,7 @@
 	  module will be called xen-kbdfront.
 
 config INPUT_SIRFSOC_ONKEY
-	bool "CSR SiRFSoC power on/off/suspend key support"
+	tristate "CSR SiRFSoC power on/off/suspend key support"
 	depends on ARCH_SIRF && OF
 	default y
 	help
diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c
index 9d5b89b..ed7237f 100644
--- a/drivers/input/misc/sirfsoc-onkey.c
+++ b/drivers/input/misc/sirfsoc-onkey.c
@@ -101,7 +101,7 @@
 static const struct of_device_id sirfsoc_pwrc_of_match[] = {
 	{ .compatible = "sirf,prima2-pwrc" },
 	{},
-}
+};
 MODULE_DEVICE_TABLE(of, sirfsoc_pwrc_of_match);
 
 static int sirfsoc_pwrc_probe(struct platform_device *pdev)
diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
index e272f06..a3f0f5a 100644
--- a/drivers/input/mouse/vmmouse.c
+++ b/drivers/input/mouse/vmmouse.c
@@ -458,8 +458,6 @@
 	priv->abs_dev = abs_dev;
 	psmouse->private = priv;
 
-	input_set_capability(rel_dev, EV_REL, REL_WHEEL);
-
 	/* Set up and register absolute device */
 	snprintf(priv->phys, sizeof(priv->phys), "%s/input1",
 		 psmouse->ps2dev.serio->phys);
@@ -475,10 +473,6 @@
 	abs_dev->id.version = psmouse->model;
 	abs_dev->dev.parent = &psmouse->ps2dev.serio->dev;
 
-	error = input_register_device(priv->abs_dev);
-	if (error)
-		goto init_fail;
-
 	/* Set absolute device capabilities */
 	input_set_capability(abs_dev, EV_KEY, BTN_LEFT);
 	input_set_capability(abs_dev, EV_KEY, BTN_RIGHT);
@@ -488,6 +482,13 @@
 	input_set_abs_params(abs_dev, ABS_X, 0, VMMOUSE_MAX_X, 0, 0);
 	input_set_abs_params(abs_dev, ABS_Y, 0, VMMOUSE_MAX_Y, 0, 0);
 
+	error = input_register_device(priv->abs_dev);
+	if (error)
+		goto init_fail;
+
+	/* Add wheel capability to the relative device */
+	input_set_capability(rel_dev, EV_REL, REL_WHEEL);
+
 	psmouse->protocol_handler = vmmouse_process_byte;
 	psmouse->disconnect = vmmouse_disconnect;
 	psmouse->reconnect = vmmouse_reconnect;
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 8f82897..1ca7f55 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -134,7 +134,7 @@
 	int error;
 
 	error = device_attach(&serio->dev);
-	if (error < 0)
+	if (error < 0 && error != -EPROBE_DEFER)
 		dev_warn(&serio->dev,
 			 "device_attach() failed for %s (%s), error: %d\n",
 			 serio->phys, serio->name, error);
diff --git a/drivers/input/touchscreen/colibri-vf50-ts.c b/drivers/input/touchscreen/colibri-vf50-ts.c
index 5d4903a..69828d0 100644
--- a/drivers/input/touchscreen/colibri-vf50-ts.c
+++ b/drivers/input/touchscreen/colibri-vf50-ts.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 0b0f8c1..23fbe38 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -822,16 +822,22 @@
 	int error;
 
 	error = device_property_read_u32(dev, "threshold", &val);
-	if (!error)
-		reg_addr->reg_threshold = val;
+	if (!error) {
+		edt_ft5x06_register_write(tsdata, reg_addr->reg_threshold, val);
+		tsdata->threshold = val;
+	}
 
 	error = device_property_read_u32(dev, "gain", &val);
-	if (!error)
-		reg_addr->reg_gain = val;
+	if (!error) {
+		edt_ft5x06_register_write(tsdata, reg_addr->reg_gain, val);
+		tsdata->gain = val;
+	}
 
 	error = device_property_read_u32(dev, "offset", &val);
-	if (!error)
-		reg_addr->reg_offset = val;
+	if (!error) {
+		edt_ft5x06_register_write(tsdata, reg_addr->reg_offset, val);
+		tsdata->offset = val;
+	}
 }
 
 static void
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 539b0de..374c129 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -114,6 +114,7 @@
 
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
+static void detach_device(struct device *dev);
 
 /*
  * For dynamic growth the aperture size is split into ranges of 128MB of
@@ -384,6 +385,9 @@
 	if (!dev_data)
 		return;
 
+	if (dev_data->domain)
+		detach_device(dev);
+
 	iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
 			    dev);
 
@@ -2049,7 +2053,7 @@
 	/* Update device table */
 	set_dte_entry(dev_data->devid, domain, ats);
 	if (alias != dev_data->devid)
-		set_dte_entry(dev_data->devid, domain, ats);
+		set_dte_entry(alias, domain, ats);
 
 	device_flush_dte(dev_data);
 }
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 013bdff..bf4959f 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -228,6 +228,10 @@
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
+				    u8 bank, u8 cntr, u8 fxn,
+				    u64 *value, bool is_write);
+
 static inline void update_last_devid(u16 devid)
 {
 	if (devid > amd_iommu_last_bdf)
@@ -1016,6 +1020,34 @@
 }
 
 /*
+ * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
+ * Workaround:
+ *     BIOS should enable ATS write permission check by setting
+ *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
+ */
+static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
+{
+	u32 value;
+
+	if ((boot_cpu_data.x86 != 0x15) ||
+	    (boot_cpu_data.x86_model < 0x30) ||
+	    (boot_cpu_data.x86_model > 0x3f))
+		return;
+
+	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
+	value = iommu_read_l2(iommu, 0x47);
+
+	if (value & BIT(0))
+		return;
+
+	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
+	iommu_write_l2(iommu, 0x47, value | BIT(0));
+
+	pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
+		dev_name(&iommu->dev->dev));
+}
+
+/*
  * This function clues the initialization function for one IOMMU
  * together and also allocates the command buffer and programs the
  * hardware. It does NOT enable the IOMMU. This is done afterwards.
@@ -1142,8 +1174,8 @@
 	amd_iommu_pc_present = true;
 
 	/* Check if the performance counters can be written to */
-	if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) ||
-	    (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) ||
+	if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
+	    (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
 	    (val != val2)) {
 		pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
 		amd_iommu_pc_present = false;
@@ -1284,6 +1316,7 @@
 	}
 
 	amd_iommu_erratum_746_workaround(iommu);
+	amd_iommu_ats_write_check_workaround(iommu);
 
 	iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu,
 					       amd_iommu_groups, "ivhd%d",
@@ -2283,22 +2316,15 @@
 }
 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
 
-int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
+				    u8 bank, u8 cntr, u8 fxn,
 				    u64 *value, bool is_write)
 {
-	struct amd_iommu *iommu;
 	u32 offset;
 	u32 max_offset_lim;
 
-	/* Make sure the IOMMU PC resource is available */
-	if (!amd_iommu_pc_present)
-		return -ENODEV;
-
-	/* Locate the iommu associated with the device ID */
-	iommu = amd_iommu_rlookup_table[devid];
-
 	/* Check for valid iommu and pc register indexing */
-	if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7)))
+	if (WARN_ON((fxn > 0x28) || (fxn & 7)))
 		return -ENODEV;
 
 	offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
@@ -2322,3 +2348,16 @@
 	return 0;
 }
 EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
+
+int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+				    u64 *value, bool is_write)
+{
+	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+	/* Make sure the IOMMU PC resource is available */
+	if (!amd_iommu_pc_present || iommu == NULL)
+		return -ENODEV;
+
+	return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
+					value, is_write);
+}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 62a400c..8ffd756 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -329,7 +329,8 @@
 	/* Only care about add/remove events for physical functions */
 	if (pdev->is_virtfn)
 		return NOTIFY_DONE;
-	if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
+	if (action != BUS_NOTIFY_ADD_DEVICE &&
+	    action != BUS_NOTIFY_REMOVED_DEVICE)
 		return NOTIFY_DONE;
 
 	info = dmar_alloc_pci_notify_info(pdev, action);
@@ -339,7 +340,7 @@
 	down_write(&dmar_global_lock);
 	if (action == BUS_NOTIFY_ADD_DEVICE)
 		dmar_pci_bus_add_dev(info);
-	else if (action == BUS_NOTIFY_DEL_DEVICE)
+	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
 		dmar_pci_bus_del_dev(info);
 	up_write(&dmar_global_lock);
 
@@ -1353,7 +1354,7 @@
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
-	sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
+	sts =  readl(iommu->reg + DMAR_GSTS_REG);
 	if (!(sts & DMA_GSTS_QIES))
 		goto end;
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ac73876..a2e1b7f 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1489,7 +1489,7 @@
 {
 	struct pci_dev *pdev;
 
-	if (dev_is_pci(info->dev))
+	if (!dev_is_pci(info->dev))
 		return;
 
 	pdev = to_pci_dev(info->dev);
@@ -4367,7 +4367,7 @@
 				rmrru->devices_cnt);
 			if(ret < 0)
 				return ret;
-		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
 			dmar_remove_dev_scope(info, rmrr->segment,
 				rmrru->devices, rmrru->devices_cnt);
 		}
@@ -4387,7 +4387,7 @@
 				break;
 			else if(ret < 0)
 				return ret;
-		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
 			if (dmar_remove_dev_scope(info, atsr->segment,
 					atsru->devices, atsru->devices_cnt))
 				break;
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 5046483..d9939fa 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -249,12 +249,30 @@
 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 {
 	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+	struct intel_svm_dev *sdev;
 
+	/* This might end up being called from exit_mmap(), *before* the page
+	 * tables are cleared. And __mmu_notifier_release() will delete us from
+	 * the list of notifiers so that our invalidate_range() callback doesn't
+	 * get called when the page tables are cleared. So we need to protect
+	 * against hardware accessing those page tables.
+	 *
+	 * We do it by clearing the entry in the PASID table and then flushing
+	 * the IOTLB and the PASID table caches. This might upset hardware;
+	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
+	 * page) so that we end up taking a fault that the hardware really
+	 * *has* to handle gracefully without affecting other processes.
+	 */
 	svm->iommu->pasid_table[svm->pasid].val = 0;
+	wmb();
 
-	/* There's no need to do any flush because we can't get here if there
-	 * are any devices left anyway. */
-	WARN_ON(!list_empty(&svm->devs));
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdev, &svm->devs, list) {
+		intel_flush_pasid_dev(svm, sdev, svm->pasid);
+		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+	}
+	rcu_read_unlock();
+
 }
 
 static const struct mmu_notifier_ops intel_mmuops = {
@@ -379,7 +397,6 @@
 				goto out;
 			}
 			iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
-			mm = NULL;
 		} else
 			iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
 		wmb();
@@ -442,11 +459,11 @@
 				kfree_rcu(sdev, rcu);
 
 				if (list_empty(&svm->devs)) {
-					mmu_notifier_unregister(&svm->notifier, svm->mm);
 
 					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
 					if (svm->mm)
-						mmput(svm->mm);
+						mmu_notifier_unregister(&svm->notifier, svm->mm);
+
 					/* We mandate that no page faults may be outstanding
 					 * for the PASID when intel_svm_unbind_mm() is called.
 					 * If that is not obeyed, subtle errors will happen.
@@ -507,6 +524,10 @@
 	struct intel_svm *svm = NULL;
 	int head, tail, handled = 0;
 
+	/* Clear PPR bit before reading head/tail registers, to
+	 * ensure that we get a new interrupt if needed. */
+	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
+
 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 	while (head != tail) {
@@ -551,6 +572,9 @@
 		 * any faults on kernel addresses. */
 		if (!svm->mm)
 			goto bad_req;
+		/* If the mm is already defunct, don't handle faults. */
+		if (!atomic_inc_not_zero(&svm->mm->mm_users))
+			goto bad_req;
 		down_read(&svm->mm->mmap_sem);
 		vma = find_extend_vma(svm->mm, address);
 		if (!vma || address < vma->vm_start)
@@ -567,6 +591,7 @@
 		result = QI_RESP_SUCCESS;
 	invalid:
 		up_read(&svm->mm->mmap_sem);
+		mmput(svm->mm);
 	bad_req:
 		/* Accounting for major/minor faults? */
 		rcu_read_lock();
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index c12ba45..ac59692 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -629,7 +629,7 @@
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
-	sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
+	sts = readl(iommu->reg + DMAR_GSTS_REG);
 	if (!(sts & DMA_GSTS_IRES))
 		goto end;
 
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8bbcbfe..381ca5a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 715923d..fb50911 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -159,6 +159,7 @@
 config TS4800_IRQ
 	tristate "TS-4800 IRQ controller"
 	select IRQ_DOMAIN
+	depends on HAS_IOMEM
 	help
 	  Support for the TS-4800 FPGA IRQ controller
 
diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c
index b12a5d5..37199b9 100644
--- a/drivers/irqchip/irq-atmel-aic-common.c
+++ b/drivers/irqchip/irq-atmel-aic-common.c
@@ -86,7 +86,7 @@
 	    priority > AT91_AIC_IRQ_MAX_PRIORITY)
 		return -EINVAL;
 
-	*val &= AT91_AIC_PRIOR;
+	*val &= ~AT91_AIC_PRIOR;
 	*val |= priority;
 
 	return 0;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index e23d1d1..43dfd15 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -66,7 +66,10 @@
 	unsigned long		phys_base;
 	struct its_cmd_block	*cmd_base;
 	struct its_cmd_block	*cmd_write;
-	void			*tables[GITS_BASER_NR_REGS];
+	struct {
+		void		*base;
+		u32		order;
+	} tables[GITS_BASER_NR_REGS];
 	struct its_collection	*collections;
 	struct list_head	its_device_list;
 	u64			flags;
@@ -75,6 +78,9 @@
 
 #define ITS_ITT_ALIGN		SZ_256
 
+/* Convert page order to size in bytes */
+#define PAGE_ORDER_TO_SIZE(o)	(PAGE_SIZE << (o))
+
 struct event_lpi_map {
 	unsigned long		*lpi_map;
 	u16			*col_map;
@@ -597,11 +603,6 @@
 	lpi_set_config(d, true);
 }
 
-static void its_eoi_irq(struct irq_data *d)
-{
-	gic_write_eoir(d->hwirq);
-}
-
 static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
 {
@@ -638,7 +639,7 @@
 	.name			= "ITS",
 	.irq_mask		= its_mask_irq,
 	.irq_unmask		= its_unmask_irq,
-	.irq_eoi		= its_eoi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
 	.irq_set_affinity	= its_set_affinity,
 	.irq_compose_msi_msg	= its_irq_compose_msi_msg,
 };
@@ -807,9 +808,10 @@
 	int i;
 
 	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
-		if (its->tables[i]) {
-			free_page((unsigned long)its->tables[i]);
-			its->tables[i] = NULL;
+		if (its->tables[i].base) {
+			free_pages((unsigned long)its->tables[i].base,
+				   its->tables[i].order);
+			its->tables[i].base = NULL;
 		}
 	}
 }
@@ -842,7 +844,6 @@
 		u64 type = GITS_BASER_TYPE(val);
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
 		int order = get_order(psz);
-		int alloc_size;
 		int alloc_pages;
 		u64 tmp;
 		void *base;
@@ -874,8 +875,8 @@
 			}
 		}
 
-		alloc_size = (1 << order) * PAGE_SIZE;
-		alloc_pages = (alloc_size / psz);
+retry_alloc_baser:
+		alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
 		if (alloc_pages > GITS_BASER_PAGES_MAX) {
 			alloc_pages = GITS_BASER_PAGES_MAX;
 			order = get_order(GITS_BASER_PAGES_MAX * psz);
@@ -889,7 +890,8 @@
 			goto out_free;
 		}
 
-		its->tables[i] = base;
+		its->tables[i].base = base;
+		its->tables[i].order = order;
 
 retry_baser:
 		val = (virt_to_phys(base) 				 |
@@ -927,7 +929,7 @@
 			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
 			if (!shr) {
 				cache = GITS_BASER_nC;
-				__flush_dcache_area(base, alloc_size);
+				__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
 			}
 			goto retry_baser;
 		}
@@ -938,13 +940,16 @@
 			 * size and retry. If we reach 4K, then
 			 * something is horribly wrong...
 			 */
+			free_pages((unsigned long)base, order);
+			its->tables[i].base = NULL;
+
 			switch (psz) {
 			case SZ_16K:
 				psz = SZ_4K;
-				goto retry_baser;
+				goto retry_alloc_baser;
 			case SZ_64K:
 				psz = SZ_16K;
-				goto retry_baser;
+				goto retry_alloc_baser;
 			}
 		}
 
@@ -957,7 +962,7 @@
 		}
 
 		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-			(int)(alloc_size / entry_size),
+			(int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
 			its_base_type_string[type],
 			(unsigned long)virt_to_phys(base),
 			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 911758c..8f9ebf7 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -384,9 +384,6 @@
 	.irq_unmask		= gic_unmask_irq,
 	.irq_eoi		= gic_eoi_irq,
 	.irq_set_type		= gic_set_type,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= gic_set_affinity,
-#endif
 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
 	.flags			= IRQCHIP_SET_TYPE_MASKED |
@@ -400,9 +397,6 @@
 	.irq_unmask		= gic_unmask_irq,
 	.irq_eoi		= gic_eoimode1_eoi_irq,
 	.irq_set_type		= gic_set_type,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= gic_set_affinity,
-#endif
 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
 	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
@@ -443,7 +437,7 @@
 	u32 bypass = 0;
 	u32 mode = 0;
 
-	if (static_key_true(&supports_deactivate))
+	if (gic == &gic_data[0] && static_key_true(&supports_deactivate))
 		mode = GIC_CPU_CTRL_EOImodeNS;
 
 	/*
@@ -1039,6 +1033,11 @@
 		gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d", gic_nr);
 	}
 
+#ifdef CONFIG_SMP
+	if (gic_nr == 0)
+		gic->chip.irq_set_affinity = gic_set_affinity;
+#endif
+
 #ifdef CONFIG_GIC_NON_BANKED
 	if (percpu_offset) { /* Frankein-GIC without banked registers... */
 		unsigned int cpu;
diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index c22e2d4..efe5084 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -241,6 +241,7 @@
 		writel(0, icoll_priv.intr + i);
 
 	icoll_add_domain(np, ASM9260_NUM_IRQS);
+	set_handle_irq(icoll_handle_irq);
 
 	return 0;
 }
diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c
index c71914e..5dc5a76 100644
--- a/drivers/irqchip/irq-s3c24xx.c
+++ b/drivers/irqchip/irq-s3c24xx.c
@@ -605,7 +605,7 @@
 	return ERR_PTR(ret);
 }
 
-static struct s3c_irq_data init_eint[32] = {
+static struct s3c_irq_data __maybe_unused init_eint[32] = {
 	{ .type = S3C_IRQTYPE_NONE, }, /* reserved */
 	{ .type = S3C_IRQTYPE_NONE, }, /* reserved */
 	{ .type = S3C_IRQTYPE_NONE, }, /* reserved */
diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
index 0704362..376b280 100644
--- a/drivers/irqchip/irq-sun4i.c
+++ b/drivers/irqchip/irq-sun4i.c
@@ -22,7 +22,6 @@
 #include <linux/of_irq.h>
 
 #include <asm/exception.h>
-#include <asm/mach/irq.h>
 
 #define SUN4I_IRQ_VECTOR_REG		0x00
 #define SUN4I_IRQ_PROTECTION_REG	0x08
diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 2a506fe..d1f8ab9 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -373,13 +373,7 @@
 
 static void gigaset_device_release(struct device *dev)
 {
-	struct cardstate *cs = dev_get_drvdata(dev);
-
-	if (!cs)
-		return;
-	dev_set_drvdata(dev, NULL);
-	kfree(cs->hw.ser);
-	cs->hw.ser = NULL;
+	kfree(container_of(dev, struct ser_cardstate, dev.dev));
 }
 
 /*
@@ -408,7 +402,6 @@
 		cs->hw.ser = NULL;
 		return rc;
 	}
-	dev_set_drvdata(&cs->hw.ser->dev.dev, cs);
 
 	tasklet_init(&cs->write_tasklet,
 		     gigaset_modem_fill, (unsigned long) cs);
diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index 8e29447..afde4ed 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -392,7 +392,7 @@
 	}
 	stat = bchannel_get_rxbuf(&bc->bch, cnt);
 	/* only transparent use the count here, HDLC overun is detected later */
-	if (stat == ENOMEM) {
+	if (stat == -ENOMEM) {
 		pr_warning("%s.B%d: No memory for %d bytes\n",
 			   card->name, bc->bch.nr, cnt);
 		return;
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 33224cb..9f6acd5 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -572,11 +572,13 @@
 		}
 	}
 
-	ret = nvm_get_sysblock(dev, &dev->sb);
-	if (!ret)
-		pr_err("nvm: device not initialized.\n");
-	else if (ret < 0)
-		pr_err("nvm: err (%d) on device initialization\n", ret);
+	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
+		ret = nvm_get_sysblock(dev, &dev->sb);
+		if (!ret)
+			pr_err("nvm: device not initialized.\n");
+		else if (ret < 0)
+			pr_err("nvm: err (%d) on device initialization\n", ret);
+	}
 
 	/* register device with a supported media manager */
 	down_write(&nvm_lock);
@@ -1055,9 +1057,11 @@
 	strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
 	info.fs_ppa.ppa = -1;
 
-	ret = nvm_init_sysblock(dev, &info);
-	if (ret)
-		return ret;
+	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
+		ret = nvm_init_sysblock(dev, &info);
+		if (ret)
+			return ret;
+	}
 
 	memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
 
@@ -1117,7 +1121,10 @@
 		dev->mt = NULL;
 	}
 
-	return nvm_dev_factory(dev, fact.flags);
+	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT)
+		return nvm_dev_factory(dev, fact.flags);
+
+	return 0;
 }
 
 static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index d8c7595..307db1e 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -300,8 +300,10 @@
 	}
 
 	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
-	if (!page)
+	if (!page) {
+		bio_put(bio);
 		return -ENOMEM;
+	}
 
 	while ((slot = find_first_zero_bit(rblk->invalid_pages,
 					    nr_pgs_per_blk)) < nr_pgs_per_blk) {
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index ef13ac7..f7b3733 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -174,8 +174,7 @@
 static inline int request_intersects(struct rrpc_inflight_rq *r,
 				sector_t laddr_start, sector_t laddr_end)
 {
-	return (laddr_end >= r->l_start && laddr_end <= r->l_end) &&
-		(laddr_start >= r->l_start && laddr_start <= r->l_end);
+	return (laddr_end >= r->l_start) && (laddr_start <= r->l_end);
 }
 
 static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
@@ -184,6 +183,8 @@
 	sector_t laddr_end = laddr + pages - 1;
 	struct rrpc_inflight_rq *rtmp;
 
+	WARN_ON(irqs_disabled());
+
 	spin_lock_irq(&rrpc->inflights.lock);
 	list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) {
 		if (unlikely(request_intersects(rtmp, laddr, laddr_end))) {
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 546d05f..b2bbe86 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -81,6 +81,7 @@
 config MAILBOX_TEST
 	tristate "Mailbox Test Client"
 	depends on OF
+	depends on HAS_IOMEM
 	help
 	  Test client to help with testing new Controller driver
 	  implementations.
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 45d85ae..8f779a1 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -81,16 +81,10 @@
  */
 static struct mbox_chan *get_pcc_channel(int id)
 {
-	struct mbox_chan *pcc_chan;
-
 	if (id < 0 || id > pcc_mbox_ctrl.num_chans)
 		return ERR_PTR(-ENOENT);
 
-	pcc_chan = (struct mbox_chan *)
-		(unsigned long) pcc_mbox_channels +
-		(id * sizeof(*pcc_chan));
-
-	return pcc_chan;
+	return &pcc_mbox_channels[id];
 }
 
 /**
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 4f22e91..d80cce4 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -210,10 +210,6 @@
 	struct block_device *bdev;
 	struct mddev *mddev = bitmap->mddev;
 	struct bitmap_storage *store = &bitmap->storage;
-	int node_offset = 0;
-
-	if (mddev_is_clustered(bitmap->mddev))
-		node_offset = bitmap->cluster_slot * store->file_pages;
 
 	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
 		int size = PAGE_SIZE;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5df4048..dd83492 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1191,6 +1191,8 @@
 
 	if (clone)
 		free_rq_clone(clone);
+	else if (!tio->md->queue->mq_ops)
+		free_rq_tio(tio);
 }
 
 /*
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 4a8e150..685aa2d 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -170,7 +170,7 @@
 		conf->nfaults = n+1;
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void faulty_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct faulty_conf *conf = mddev->private;
 	int failit = 0;
@@ -226,7 +226,7 @@
 	generic_make_request(bio);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void faulty_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct faulty_conf *conf = mddev->private;
 	int n;
@@ -259,7 +259,7 @@
 }
 
 
-static int reshape(struct mddev *mddev)
+static int faulty_reshape(struct mddev *mddev)
 {
 	int mode = mddev->new_layout & ModeMask;
 	int count = mddev->new_layout >> ModeShift;
@@ -299,7 +299,7 @@
 	return sectors;
 }
 
-static int run(struct mddev *mddev)
+static int faulty_run(struct mddev *mddev)
 {
 	struct md_rdev *rdev;
 	int i;
@@ -327,7 +327,7 @@
 	md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
 	mddev->private = conf;
 
-	reshape(mddev);
+	faulty_reshape(mddev);
 
 	return 0;
 }
@@ -344,11 +344,11 @@
 	.name		= "faulty",
 	.level		= LEVEL_FAULTY,
 	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
+	.make_request	= faulty_make_request,
+	.run		= faulty_run,
 	.free		= faulty_free,
-	.status		= status,
-	.check_reshape	= reshape,
+	.status		= faulty_status,
+	.check_reshape	= faulty_reshape,
 	.size		= faulty_size,
 };
 
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 0ded8e9..dd97d42 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -293,6 +293,7 @@
 dlm_unlock:
 		dlm_unlock_sync(bm_lockres);
 clear_bit:
+		lockres_free(bm_lockres);
 		clear_bit(slot, &cinfo->recovery_map);
 	}
 }
@@ -682,8 +683,10 @@
 		bm_lockres = lockres_init(mddev, str, NULL, 1);
 		if (!bm_lockres)
 			return -ENOMEM;
-		if (i == (cinfo->slot_number - 1))
+		if (i == (cinfo->slot_number - 1)) {
+			lockres_free(bm_lockres);
 			continue;
+		}
 
 		bm_lockres->flags |= DLM_LKF_NOQUEUE;
 		ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
@@ -858,6 +861,7 @@
 	lockres_free(cinfo->token_lockres);
 	lockres_free(cinfo->ack_lockres);
 	lockres_free(cinfo->no_new_dev_lockres);
+	lockres_free(cinfo->resync_lockres);
 	lockres_free(cinfo->bitmap_lockres);
 	unlock_all_bitmaps(mddev);
 	dlm_release_lockspace(cinfo->lockspace, 2);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c4b9134..4e3843f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1044,7 +1044,7 @@
 	kfree(plug);
 }
 
-static void make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 {
 	struct r1conf *conf = mddev->private;
 	struct raid1_info *mirror;
@@ -1422,7 +1422,7 @@
 	wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r1conf *conf = mddev->private;
 	int i;
@@ -1439,7 +1439,7 @@
 	seq_printf(seq, "]");
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char b[BDEVNAME_SIZE];
 	struct r1conf *conf = mddev->private;
@@ -2472,7 +2472,8 @@
  * that can be installed to exclude normal IO requests.
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
+				   int *skipped)
 {
 	struct r1conf *conf = mddev->private;
 	struct r1bio *r1_bio;
@@ -2890,7 +2891,7 @@
 }
 
 static void raid1_free(struct mddev *mddev, void *priv);
-static int run(struct mddev *mddev)
+static int raid1_run(struct mddev *mddev)
 {
 	struct r1conf *conf;
 	int i;
@@ -3170,15 +3171,15 @@
 	.name		= "raid1",
 	.level		= 1,
 	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
+	.make_request	= raid1_make_request,
+	.run		= raid1_run,
 	.free		= raid1_free,
-	.status		= status,
-	.error_handler	= error,
+	.status		= raid1_status,
+	.error_handler	= raid1_error,
 	.hot_add_disk	= raid1_add_disk,
 	.hot_remove_disk= raid1_remove_disk,
 	.spare_active	= raid1_spare_active,
-	.sync_request	= sync_request,
+	.sync_request	= raid1_sync_request,
 	.resize		= raid1_resize,
 	.size		= raid1_size,
 	.check_reshape	= raid1_reshape,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ce959b4..1c1447d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1442,7 +1442,7 @@
 	one_write_done(r10_bio);
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;
 	sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
@@ -1484,7 +1484,7 @@
 	wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid10_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r10conf *conf = mddev->private;
 	int i;
@@ -1562,7 +1562,7 @@
 		_enough(conf, 1, ignore);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char b[BDEVNAME_SIZE];
 	struct r10conf *conf = mddev->private;
@@ -2802,7 +2802,7 @@
  *
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
+static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 			     int *skipped)
 {
 	struct r10conf *conf = mddev->private;
@@ -3523,7 +3523,7 @@
 	return ERR_PTR(err);
 }
 
-static int run(struct mddev *mddev)
+static int raid10_run(struct mddev *mddev)
 {
 	struct r10conf *conf;
 	int i, disk_idx, chunk_size;
@@ -4617,15 +4617,15 @@
 	.name		= "raid10",
 	.level		= 10,
 	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
+	.make_request	= raid10_make_request,
+	.run		= raid10_run,
 	.free		= raid10_free,
-	.status		= status,
-	.error_handler	= error,
+	.status		= raid10_status,
+	.error_handler	= raid10_error,
 	.hot_add_disk	= raid10_add_disk,
 	.hot_remove_disk= raid10_remove_disk,
 	.spare_active	= raid10_spare_active,
-	.sync_request	= sync_request,
+	.sync_request	= raid10_sync_request,
 	.quiesce	= raid10_quiesce,
 	.size		= raid10_size,
 	.resize		= raid10_resize,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a086014..b4f02c9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2496,7 +2496,7 @@
 	dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char b[BDEVNAME_SIZE];
 	struct r5conf *conf = mddev->private;
@@ -2958,7 +2958,7 @@
 	 * If several bio share a stripe. The bio bi_phys_segments acts as a
 	 * reference count to avoid race. The reference count should already be
 	 * increased before this function is called (for example, in
-	 * make_request()), so other bio sharing this stripe will not free the
+	 * raid5_make_request()), so other bio sharing this stripe will not free the
 	 * stripe. If a stripe is owned by one stripe, the stripe lock will
 	 * protect it.
 	 */
@@ -5135,7 +5135,7 @@
 	}
 }
 
-static void make_request(struct mddev *mddev, struct bio * bi)
+static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 {
 	struct r5conf *conf = mddev->private;
 	int dd_idx;
@@ -5225,7 +5225,7 @@
 		new_sector = raid5_compute_sector(conf, logical_sector,
 						  previous,
 						  &dd_idx, NULL);
-		pr_debug("raid456: make_request, sector %llu logical %llu\n",
+		pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
 			(unsigned long long)new_sector,
 			(unsigned long long)logical_sector);
 
@@ -5575,7 +5575,8 @@
 	return retn;
 }
 
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
+					  int *skipped)
 {
 	struct r5conf *conf = mddev->private;
 	struct stripe_head *sh;
@@ -6674,7 +6675,7 @@
 	return 0;
 }
 
-static int run(struct mddev *mddev)
+static int raid5_run(struct mddev *mddev)
 {
 	struct r5conf *conf;
 	int working_disks = 0;
@@ -7048,7 +7049,7 @@
 	mddev->to_remove = &raid5_attrs_group;
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid5_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r5conf *conf = mddev->private;
 	int i;
@@ -7864,15 +7865,15 @@
 	.name		= "raid6",
 	.level		= 6,
 	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
+	.make_request	= raid5_make_request,
+	.run		= raid5_run,
 	.free		= raid5_free,
-	.status		= status,
-	.error_handler	= error,
+	.status		= raid5_status,
+	.error_handler	= raid5_error,
 	.hot_add_disk	= raid5_add_disk,
 	.hot_remove_disk= raid5_remove_disk,
 	.spare_active	= raid5_spare_active,
-	.sync_request	= sync_request,
+	.sync_request	= raid5_sync_request,
 	.resize		= raid5_resize,
 	.size		= raid5_size,
 	.check_reshape	= raid6_check_reshape,
@@ -7887,15 +7888,15 @@
 	.name		= "raid5",
 	.level		= 5,
 	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
+	.make_request	= raid5_make_request,
+	.run		= raid5_run,
 	.free		= raid5_free,
-	.status		= status,
-	.error_handler	= error,
+	.status		= raid5_status,
+	.error_handler	= raid5_error,
 	.hot_add_disk	= raid5_add_disk,
 	.hot_remove_disk= raid5_remove_disk,
 	.spare_active	= raid5_spare_active,
-	.sync_request	= sync_request,
+	.sync_request	= raid5_sync_request,
 	.resize		= raid5_resize,
 	.size		= raid5_size,
 	.check_reshape	= raid5_check_reshape,
@@ -7911,15 +7912,15 @@
 	.name		= "raid4",
 	.level		= 4,
 	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
+	.make_request	= raid5_make_request,
+	.run		= raid5_run,
 	.free		= raid5_free,
-	.status		= status,
-	.error_handler	= error,
+	.status		= raid5_status,
+	.error_handler	= raid5_error,
 	.hot_add_disk	= raid5_add_disk,
 	.hot_remove_disk= raid5_remove_disk,
 	.spare_active	= raid5_spare_active,
-	.sync_request	= sync_request,
+	.sync_request	= raid5_sync_request,
 	.resize		= raid5_resize,
 	.size		= raid5_size,
 	.check_reshape	= raid5_check_reshape,
diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c
index 0e209b5..c6abeb4 100644
--- a/drivers/media/dvb-frontends/tda1004x.c
+++ b/drivers/media/dvb-frontends/tda1004x.c
@@ -903,9 +903,18 @@
 {
 	struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache;
 	struct tda1004x_state* state = fe->demodulator_priv;
+	int status;
 
 	dprintk("%s\n", __func__);
 
+	status = tda1004x_read_byte(state, TDA1004X_STATUS_CD);
+	if (status == -1)
+		return -EIO;
+
+	/* Only update the properties cache if device is locked */
+	if (!(status & 8))
+		return 0;
+
 	// inversion status
 	fe_params->inversion = INVERSION_OFF;
 	if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20)
diff --git a/drivers/media/i2c/adp1653.c b/drivers/media/i2c/adp1653.c
index 7e9cbf7..fb7ed73 100644
--- a/drivers/media/i2c/adp1653.c
+++ b/drivers/media/i2c/adp1653.c
@@ -497,7 +497,7 @@
 		if (!client->dev.platform_data) {
 			dev_err(&client->dev,
 				"Neither DT not platform data provided\n");
-			return EINVAL;
+			return -EINVAL;
 		}
 		flash->platform_data = client->dev.platform_data;
 	}
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index f8dd750..e1719ff 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -1960,10 +1960,9 @@
 	}
 
 	/* tx 5v detect */
-	tx_5v = io_read(sd, 0x70) & info->cable_det_mask;
+	tx_5v = irq_reg_0x70 & info->cable_det_mask;
 	if (tx_5v) {
 		v4l2_dbg(1, debug, sd, "%s: tx_5v: 0x%x\n", __func__, tx_5v);
-		io_write(sd, 0x71, tx_5v);
 		adv76xx_s_detect_tx_5v_ctrl(sd);
 		if (handled)
 			*handled = true;
diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c
index 8304919..bf82726 100644
--- a/drivers/media/i2c/ir-kbd-i2c.c
+++ b/drivers/media/i2c/ir-kbd-i2c.c
@@ -478,7 +478,6 @@
 	{ "ir_rx_z8f0811_hdpvr", 0 },
 	{ }
 };
-MODULE_DEVICE_TABLE(i2c, ir_kbd_id);
 
 static struct i2c_driver ir_kbd_driver = {
 	.driver = {
diff --git a/drivers/media/i2c/s5k6a3.c b/drivers/media/i2c/s5k6a3.c
index b9e43ff..cbe4711 100644
--- a/drivers/media/i2c/s5k6a3.c
+++ b/drivers/media/i2c/s5k6a3.c
@@ -144,8 +144,7 @@
 	mf = __s5k6a3_get_format(sensor, cfg, fmt->pad, fmt->which);
 	if (mf) {
 		mutex_lock(&sensor->lock);
-		if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE)
-			*mf = fmt->format;
+		*mf = fmt->format;
 		mutex_unlock(&sensor->lock);
 	}
 	return 0;
diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 7dae0ac..e9219f5 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -20,6 +20,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+/* We need to access legacy defines from linux/media.h */
+#define __NEED_MEDIA_LEGACY_API
+
 #include <linux/compat.h>
 #include <linux/export.h>
 #include <linux/idr.h>
@@ -115,6 +118,26 @@
 	u_ent.group_id = 0;		/* Unused */
 	u_ent.pads = ent->num_pads;
 	u_ent.links = ent->num_links - ent->num_backlinks;
+
+	/*
+	 * Workaround for a bug at media-ctl <= v1.10 that makes it to
+	 * do the wrong thing if the entity function doesn't belong to
+	 * either MEDIA_ENT_F_OLD_BASE or MEDIA_ENT_F_OLD_SUBDEV_BASE
+	 * Ranges.
+	 *
+	 * Non-subdevices are expected to be at the MEDIA_ENT_F_OLD_BASE,
+	 * or, otherwise, will be silently ignored by media-ctl when
+	 * printing the graphviz diagram. So, map them into the devnode
+	 * old range.
+	 */
+	if (ent->function < MEDIA_ENT_F_OLD_BASE ||
+	    ent->function > MEDIA_ENT_T_DEVNODE_UNKNOWN) {
+		if (is_media_entity_v4l2_subdev(ent))
+			u_ent.type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN;
+		else if (ent->function != MEDIA_ENT_F_IO_V4L)
+			u_ent.type = MEDIA_ENT_T_DEVNODE_UNKNOWN;
+	}
+
 	memcpy(&u_ent.raw, &ent->info, sizeof(ent->info));
 	if (copy_to_user(uent, &u_ent, sizeof(u_ent)))
 		return -EFAULT;
diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c
index 1d2c310..94f8162 100644
--- a/drivers/media/pci/saa7134/saa7134-alsa.c
+++ b/drivers/media/pci/saa7134/saa7134-alsa.c
@@ -1211,6 +1211,8 @@
 
 static int alsa_device_exit(struct saa7134_dev *dev)
 {
+	if (!snd_saa7134_cards[dev->nr])
+		return 1;
 
 	snd_card_free(snd_saa7134_cards[dev->nr]);
 	snd_saa7134_cards[dev->nr] = NULL;
@@ -1260,7 +1262,8 @@
 	int idx;
 
 	for (idx = 0; idx < SNDRV_CARDS; idx++) {
-		snd_card_free(snd_saa7134_cards[idx]);
+		if (snd_saa7134_cards[idx])
+			snd_card_free(snd_saa7134_cards[idx]);
 	}
 
 	saa7134_dmasound_init = NULL;
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 5263594..8b89ebe1 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -215,6 +215,7 @@
 config VIDEO_STI_BDISP
 	tristate "STMicroelectronics BDISP 2D blitter driver"
 	depends on VIDEO_DEV && VIDEO_V4L2
+	depends on HAS_DMA
 	depends on ARCH_STI || COMPILE_TEST
 	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_MEM2MEM_DEV
diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c
index 7d28899..38aacc7 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -1455,9 +1455,9 @@
 		return 0;
 
 	ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
-	ctx->bitstream.vaddr = dma_alloc_writecombine(
-			&ctx->dev->plat_dev->dev, ctx->bitstream.size,
-			&ctx->bitstream.paddr, GFP_KERNEL);
+	ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
+					    ctx->bitstream.size,
+					    &ctx->bitstream.paddr, GFP_KERNEL);
 	if (!ctx->bitstream.vaddr) {
 		v4l2_err(&ctx->dev->v4l2_dev,
 			 "failed to allocate bitstream ringbuffer");
@@ -1474,8 +1474,8 @@
 	if (ctx->bitstream.vaddr == NULL)
 		return;
 
-	dma_free_writecombine(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
-			      ctx->bitstream.vaddr, ctx->bitstream.paddr);
+	dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
+		    ctx->bitstream.vaddr, ctx->bitstream.paddr);
 	ctx->bitstream.vaddr = NULL;
 	kfifo_init(&ctx->bitstream_fifo, NULL, 0);
 }
diff --git a/drivers/media/platform/exynos4-is/Kconfig b/drivers/media/platform/exynos4-is/Kconfig
index 40423c6..57d42c61 100644
--- a/drivers/media/platform/exynos4-is/Kconfig
+++ b/drivers/media/platform/exynos4-is/Kconfig
@@ -1,6 +1,6 @@
 
 config VIDEO_SAMSUNG_EXYNOS4_IS
-	bool "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
+	tristate "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
 	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
 	depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	depends on OF && COMMON_CLK
diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c
index 49658ca..979c388 100644
--- a/drivers/media/platform/exynos4-is/fimc-is.c
+++ b/drivers/media/platform/exynos4-is/fimc-is.c
@@ -631,6 +631,12 @@
 
 	fimc_is_mem_barrier();
 
+	/*
+	 * Some user space use cases hang up here without this
+	 * empirically chosen delay.
+	 */
+	udelay(100);
+
 	mcuctl_write(HIC_OPEN_SENSOR, is, MCUCTL_REG_ISSR(0));
 	mcuctl_write(is->sensor_index, is, MCUCTL_REG_ISSR(1));
 	mcuctl_write(sensor->drvdata->id, is, MCUCTL_REG_ISSR(2));
diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c
index bf9261e..c081672 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp-video.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c
@@ -218,8 +218,8 @@
 							ivb->dma_addr[i];
 
 			isp_dbg(2, &video->ve.vdev,
-				"dma_buf %pad (%d/%d/%d) addr: %pad\n",
-				&buf_index, ivb->index, i, vb->index,
+				"dma_buf %d (%d/%d/%d) addr: %pad\n",
+				buf_index, ivb->index, i, vb->index,
 				&ivb->dma_addr[i]);
 		}
 
diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c
index f3b2dd3..e79ddbb 100644
--- a/drivers/media/platform/exynos4-is/media-dev.c
+++ b/drivers/media/platform/exynos4-is/media-dev.c
@@ -186,32 +186,20 @@
 }
 
 /**
- * __fimc_pipeline_open - update the pipeline information, enable power
- *                        of all pipeline subdevs and the sensor clock
- * @me: media entity to start graph walk with
- * @prepare: true to walk the current pipeline and acquire all subdevs
+ * __fimc_pipeline_enable - enable power of all pipeline subdevs
+ *			    and the sensor clock
+ * @ep: video pipeline structure
+ * @fmd: fimc media device
  *
  * Called with the graph mutex held.
  */
-static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
-				struct media_entity *me, bool prepare)
+static int __fimc_pipeline_enable(struct exynos_media_pipeline *ep,
+				  struct fimc_md *fmd)
 {
-	struct fimc_md *fmd = entity_to_fimc_mdev(me);
 	struct fimc_pipeline *p = to_fimc_pipeline(ep);
-	struct v4l2_subdev *sd;
 	int ret;
 
-	if (WARN_ON(p == NULL || me == NULL))
-		return -EINVAL;
-
-	if (prepare)
-		fimc_pipeline_prepare(p, me);
-
-	sd = p->subdevs[IDX_SENSOR];
-	if (sd == NULL)
-		return -EINVAL;
-
-	/* Disable PXLASYNC clock if this pipeline includes FIMC-IS */
+	/* Enable PXLASYNC clock if this pipeline includes FIMC-IS */
 	if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
 		ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
 		if (ret < 0)
@@ -229,6 +217,40 @@
 }
 
 /**
+ * __fimc_pipeline_open - update the pipeline information, enable power
+ *                        of all pipeline subdevs and the sensor clock
+ * @me: media entity to start graph walk with
+ * @prepare: true to walk the current pipeline and acquire all subdevs
+ *
+ * Called with the graph mutex held.
+ */
+static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
+				struct media_entity *me, bool prepare)
+{
+	struct fimc_md *fmd = entity_to_fimc_mdev(me);
+	struct fimc_pipeline *p = to_fimc_pipeline(ep);
+	struct v4l2_subdev *sd;
+
+	if (WARN_ON(p == NULL || me == NULL))
+		return -EINVAL;
+
+	if (prepare)
+		fimc_pipeline_prepare(p, me);
+
+	sd = p->subdevs[IDX_SENSOR];
+	if (sd == NULL) {
+		pr_warn("%s(): No sensor subdev\n", __func__);
+		/*
+		 * Pipeline open cannot fail so as to make it possible
+		 * for the user space to configure the pipeline.
+		 */
+		return 0;
+	}
+
+	return __fimc_pipeline_enable(ep, fmd);
+}
+
+/**
  * __fimc_pipeline_close - disable the sensor clock and pipeline power
  * @fimc: fimc device terminating the pipeline
  *
@@ -269,10 +291,43 @@
 		{ IDX_CSIS, IDX_FLITE, IDX_FIMC, IDX_SENSOR, IDX_IS_ISP },
 	};
 	struct fimc_pipeline *p = to_fimc_pipeline(ep);
+	struct fimc_md *fmd = entity_to_fimc_mdev(&p->subdevs[IDX_CSIS]->entity);
+	enum fimc_subdev_index sd_id;
 	int i, ret = 0;
 
-	if (p->subdevs[IDX_SENSOR] == NULL)
-		return -ENODEV;
+	if (p->subdevs[IDX_SENSOR] == NULL) {
+		if (!fmd->user_subdev_api) {
+			/*
+			 * Sensor must be already discovered if we
+			 * aren't in the user_subdev_api mode
+			 */
+			return -ENODEV;
+		}
+
+		/* Get pipeline sink entity */
+		if (p->subdevs[IDX_FIMC])
+			sd_id = IDX_FIMC;
+		else if (p->subdevs[IDX_IS_ISP])
+			sd_id = IDX_IS_ISP;
+		else if (p->subdevs[IDX_FLITE])
+			sd_id = IDX_FLITE;
+		else
+			return -ENODEV;
+
+		/*
+		 * Sensor could have been linked between open and STREAMON -
+		 * check if this is the case.
+		 */
+		fimc_pipeline_prepare(p, &p->subdevs[sd_id]->entity);
+
+		if (p->subdevs[IDX_SENSOR] == NULL)
+			return -ENODEV;
+
+		ret = __fimc_pipeline_enable(ep, fmd);
+		if (ret < 0)
+			return ret;
+
+	}
 
 	for (i = 0; i < IDX_MAX; i++) {
 		unsigned int idx = seq[on][i];
@@ -282,8 +337,10 @@
 		if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
 			goto error;
 	}
+
 	return 0;
 error:
+	fimc_pipeline_s_power(p, !on);
 	for (; i >= 0; i--) {
 		unsigned int idx = seq[on][i];
 		v4l2_subdev_call(p->subdevs[idx], video, s_stream, !on);
diff --git a/drivers/media/platform/soc_camera/atmel-isi.c b/drivers/media/platform/soc_camera/atmel-isi.c
index c398b28..1af779e 100644
--- a/drivers/media/platform/soc_camera/atmel-isi.c
+++ b/drivers/media/platform/soc_camera/atmel-isi.c
@@ -795,7 +795,7 @@
 			xlate->host_fmt	= &isi_camera_formats[i];
 			xlate->code	= code.code;
 			dev_dbg(icd->parent, "Providing format %s using code %d\n",
-				isi_camera_formats[0].name, code.code);
+				xlate->host_fmt->name, xlate->code);
 		}
 		break;
 	default:
diff --git a/drivers/media/platform/soc_camera/soc_camera.c b/drivers/media/platform/soc_camera/soc_camera.c
index cc84c6d..46c7186 100644
--- a/drivers/media/platform/soc_camera/soc_camera.c
+++ b/drivers/media/platform/soc_camera/soc_camera.c
@@ -1493,6 +1493,8 @@
 					struct soc_camera_async_client, notifier);
 	struct soc_camera_device *icd = platform_get_drvdata(sasc->pdev);
 
+	icd->control = NULL;
+
 	if (icd->clk) {
 		v4l2_clk_unregister(icd->clk);
 		icd->clk = NULL;
diff --git a/drivers/media/platform/vsp1/vsp1_drv.c b/drivers/media/platform/vsp1/vsp1_drv.c
index 42dff9d..533bc79 100644
--- a/drivers/media/platform/vsp1/vsp1_drv.c
+++ b/drivers/media/platform/vsp1/vsp1_drv.c
@@ -256,7 +256,7 @@
 
 	/* Create links. */
 	list_for_each_entry(entity, &vsp1->entities, list_dev) {
-		if (entity->type == VSP1_ENTITY_LIF) {
+		if (entity->type == VSP1_ENTITY_WPF) {
 			ret = vsp1_wpf_create_links(vsp1, entity);
 			if (ret < 0)
 				goto done;
@@ -264,7 +264,10 @@
 			ret = vsp1_rpf_create_links(vsp1, entity);
 			if (ret < 0)
 				goto done;
-		} else {
+		}
+
+		if (entity->type != VSP1_ENTITY_LIF &&
+		    entity->type != VSP1_ENTITY_RPF) {
 			ret = vsp1_create_links(vsp1, entity);
 			if (ret < 0)
 				goto done;
diff --git a/drivers/media/platform/vsp1/vsp1_video.c b/drivers/media/platform/vsp1/vsp1_video.c
index 637d0d6..b4dca57 100644
--- a/drivers/media/platform/vsp1/vsp1_video.c
+++ b/drivers/media/platform/vsp1/vsp1_video.c
@@ -515,7 +515,7 @@
 	bool stopped;
 
 	spin_lock_irqsave(&pipe->irqlock, flags);
-	stopped = pipe->state == VSP1_PIPELINE_STOPPED,
+	stopped = pipe->state == VSP1_PIPELINE_STOPPED;
 	spin_unlock_irqrestore(&pipe->irqlock, flags);
 
 	return stopped;
diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 8c54fd2..a136257 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -1843,8 +1843,7 @@
 			ent->function = MEDIA_ENT_F_CONN_RF;
 			break;
 		default: /* AU0828_VMUX_DEBUG */
-			ent->function = MEDIA_ENT_F_CONN_TEST;
-			break;
+			continue;
 		}
 
 		ret = media_entity_pads_init(ent, 1, &dev->input_pad[i]);
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index c5d49d7..ff8953a 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1063,8 +1063,11 @@
  */
 static int __qbuf_mmap(struct vb2_buffer *vb, const void *pb)
 {
-	int ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
-			vb, pb, vb->planes);
+	int ret = 0;
+
+	if (pb)
+		ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+				 vb, pb, vb->planes);
 	return ret ? ret : call_vb_qop(vb, buf_prepare, vb);
 }
 
@@ -1077,14 +1080,16 @@
 	struct vb2_queue *q = vb->vb2_queue;
 	void *mem_priv;
 	unsigned int plane;
-	int ret;
+	int ret = 0;
 	enum dma_data_direction dma_dir =
 		q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
 	bool reacquired = vb->planes[0].mem_priv == NULL;
 
 	memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
 	/* Copy relevant information provided by the userspace */
-	ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+	if (pb)
+		ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+				 vb, pb, planes);
 	if (ret)
 		return ret;
 
@@ -1192,14 +1197,16 @@
 	struct vb2_queue *q = vb->vb2_queue;
 	void *mem_priv;
 	unsigned int plane;
-	int ret;
+	int ret = 0;
 	enum dma_data_direction dma_dir =
 		q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
 	bool reacquired = vb->planes[0].mem_priv == NULL;
 
 	memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
 	/* Copy relevant information provided by the userspace */
-	ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+	if (pb)
+		ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+				 vb, pb, planes);
 	if (ret)
 		return ret;
 
@@ -1520,7 +1527,8 @@
 	q->waiting_for_buffers = false;
 	vb->state = VB2_BUF_STATE_QUEUED;
 
-	call_void_bufop(q, copy_timestamp, vb, pb);
+	if (pb)
+		call_void_bufop(q, copy_timestamp, vb, pb);
 
 	trace_vb2_qbuf(q, vb);
 
@@ -1532,7 +1540,8 @@
 		__enqueue_in_driver(vb);
 
 	/* Fill buffer information for the userspace */
-	call_void_bufop(q, fill_user_buffer, vb, pb);
+	if (pb)
+		call_void_bufop(q, fill_user_buffer, vb, pb);
 
 	/*
 	 * If streamon has been called, and we haven't yet called
@@ -1731,7 +1740,8 @@
  * The return values from this function are intended to be directly returned
  * from vidioc_dqbuf handler in driver.
  */
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+		   bool nonblocking)
 {
 	struct vb2_buffer *vb = NULL;
 	int ret;
@@ -1754,8 +1764,12 @@
 
 	call_void_vb_qop(vb, buf_finish, vb);
 
+	if (pindex)
+		*pindex = vb->index;
+
 	/* Fill buffer information for the userspace */
-	call_void_bufop(q, fill_user_buffer, vb, pb);
+	if (pb)
+		call_void_bufop(q, fill_user_buffer, vb, pb);
 
 	/* Remove from videobuf queue */
 	list_del(&vb->queued_entry);
@@ -1828,7 +1842,7 @@
 	 * that's done in dqbuf, but that's not going to happen when we
 	 * cancel the whole queue. Note: this code belongs here, not in
 	 * __vb2_dqbuf() since in vb2_internal_dqbuf() there is a critical
-	 * call to __fill_v4l2_buffer() after buf_finish(). That order can't
+	 * call to __fill_user_buffer() after buf_finish(). That order can't
 	 * be changed, so we can't move the buf_finish() to __vb2_dqbuf().
 	 */
 	for (i = 0; i < q->num_buffers; ++i) {
@@ -2357,7 +2371,6 @@
 	unsigned int count;
 	unsigned int type;
 	unsigned int memory;
-	struct vb2_buffer *b;
 	struct vb2_fileio_buf bufs[VB2_MAX_FRAME];
 	unsigned int cur_index;
 	unsigned int initial_index;
@@ -2410,12 +2423,6 @@
 	if (fileio == NULL)
 		return -ENOMEM;
 
-	fileio->b = kzalloc(q->buf_struct_size, GFP_KERNEL);
-	if (fileio->b == NULL) {
-		kfree(fileio);
-		return -ENOMEM;
-	}
-
 	fileio->read_once = q->fileio_read_once;
 	fileio->write_immediately = q->fileio_write_immediately;
 
@@ -2460,13 +2467,7 @@
 		 * Queue all buffers.
 		 */
 		for (i = 0; i < q->num_buffers; i++) {
-			struct vb2_buffer *b = fileio->b;
-
-			memset(b, 0, q->buf_struct_size);
-			b->type = q->type;
-			b->memory = q->memory;
-			b->index = i;
-			ret = vb2_core_qbuf(q, i, b);
+			ret = vb2_core_qbuf(q, i, NULL);
 			if (ret)
 				goto err_reqbufs;
 			fileio->bufs[i].queued = 1;
@@ -2511,7 +2512,6 @@
 		q->fileio = NULL;
 		fileio->count = 0;
 		vb2_core_reqbufs(q, fileio->memory, &fileio->count);
-		kfree(fileio->b);
 		kfree(fileio);
 		dprintk(3, "file io emulator closed\n");
 	}
@@ -2539,7 +2539,8 @@
 	 * else is able to provide this information with the write() operation.
 	 */
 	bool copy_timestamp = !read && q->copy_timestamp;
-	int ret, index;
+	unsigned index;
+	int ret;
 
 	dprintk(3, "mode %s, offset %ld, count %zd, %sblocking\n",
 		read ? "read" : "write", (long)*ppos, count,
@@ -2564,22 +2565,20 @@
 	 */
 	index = fileio->cur_index;
 	if (index >= q->num_buffers) {
-		struct vb2_buffer *b = fileio->b;
+		struct vb2_buffer *b;
 
 		/*
 		 * Call vb2_dqbuf to get buffer back.
 		 */
-		memset(b, 0, q->buf_struct_size);
-		b->type = q->type;
-		b->memory = q->memory;
-		ret = vb2_core_dqbuf(q, b, nonblock);
+		ret = vb2_core_dqbuf(q, &index, NULL, nonblock);
 		dprintk(5, "vb2_dqbuf result: %d\n", ret);
 		if (ret)
 			return ret;
 		fileio->dq_count += 1;
 
-		fileio->cur_index = index = b->index;
+		fileio->cur_index = index;
 		buf = &fileio->bufs[index];
+		b = q->bufs[index];
 
 		/*
 		 * Get number of bytes filled by the driver
@@ -2630,7 +2629,7 @@
 	 * Queue next buffer if required.
 	 */
 	if (buf->pos == buf->size || (!read && fileio->write_immediately)) {
-		struct vb2_buffer *b = fileio->b;
+		struct vb2_buffer *b = q->bufs[index];
 
 		/*
 		 * Check if this is the last buffer to read.
@@ -2643,15 +2642,11 @@
 		/*
 		 * Call vb2_qbuf and give buffer to the driver.
 		 */
-		memset(b, 0, q->buf_struct_size);
-		b->type = q->type;
-		b->memory = q->memory;
-		b->index = index;
 		b->planes[0].bytesused = buf->pos;
 
 		if (copy_timestamp)
 			b->timestamp = ktime_get_ns();
-		ret = vb2_core_qbuf(q, index, b);
+		ret = vb2_core_qbuf(q, index, NULL);
 		dprintk(5, "vb2_dbuf result: %d\n", ret);
 		if (ret)
 			return ret;
@@ -2713,10 +2708,9 @@
 {
 	struct vb2_queue *q = data;
 	struct vb2_threadio_data *threadio = q->threadio;
-	struct vb2_fileio_data *fileio = q->fileio;
 	bool copy_timestamp = false;
-	int prequeue = 0;
-	int index = 0;
+	unsigned prequeue = 0;
+	unsigned index = 0;
 	int ret = 0;
 
 	if (q->is_output) {
@@ -2728,37 +2722,34 @@
 
 	for (;;) {
 		struct vb2_buffer *vb;
-		struct vb2_buffer *b = fileio->b;
 
 		/*
 		 * Call vb2_dqbuf to get buffer back.
 		 */
-		memset(b, 0, q->buf_struct_size);
-		b->type = q->type;
-		b->memory = q->memory;
 		if (prequeue) {
-			b->index = index++;
+			vb = q->bufs[index++];
 			prequeue--;
 		} else {
 			call_void_qop(q, wait_finish, q);
 			if (!threadio->stop)
-				ret = vb2_core_dqbuf(q, b, 0);
+				ret = vb2_core_dqbuf(q, &index, NULL, 0);
 			call_void_qop(q, wait_prepare, q);
 			dprintk(5, "file io: vb2_dqbuf result: %d\n", ret);
+			if (!ret)
+				vb = q->bufs[index];
 		}
 		if (ret || threadio->stop)
 			break;
 		try_to_freeze();
 
-		vb = q->bufs[b->index];
-		if (b->state == VB2_BUF_STATE_DONE)
+		if (vb->state != VB2_BUF_STATE_ERROR)
 			if (threadio->fnc(vb, threadio->priv))
 				break;
 		call_void_qop(q, wait_finish, q);
 		if (copy_timestamp)
-			b->timestamp = ktime_get_ns();;
+			vb->timestamp = ktime_get_ns();;
 		if (!threadio->stop)
-			ret = vb2_core_qbuf(q, b->index, b);
+			ret = vb2_core_qbuf(q, vb->index, NULL);
 		call_void_qop(q, wait_prepare, q);
 		if (ret || threadio->stop)
 			break;
diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index c9a2860..91f5521 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -625,7 +625,7 @@
 		return -EINVAL;
 	}
 
-	ret = vb2_core_dqbuf(q, b, nonblocking);
+	ret = vb2_core_dqbuf(q, NULL, b, nonblocking);
 
 	return ret;
 }
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index e6e4bac..12099b0 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -2048,6 +2048,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(db8500_prcmu_config_hotmon);
 
 static int config_hot_period(u16 val)
 {
@@ -2074,11 +2075,13 @@
 
 	return config_hot_period(cycles32k);
 }
+EXPORT_SYMBOL_GPL(db8500_prcmu_start_temp_sense);
 
 int db8500_prcmu_stop_temp_sense(void)
 {
 	return config_hot_period(0xFFFF);
 }
+EXPORT_SYMBOL_GPL(db8500_prcmu_stop_temp_sense);
 
 static int prcmu_a9wdog(u8 cmd, u8 d0, u8 d1, u8 d2, u8 d3)
 {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 4c1903f..0c6c17a1 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -415,7 +415,7 @@
 		delta = mftb() - psl_tb;
 		if (delta < 0)
 			delta = -delta;
-	} while (cputime_to_usecs(delta) > 16);
+	} while (tb_to_ns(delta) > 16000);
 
 	return 0;
 }
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 11fdadc..2a6eaf1 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -103,6 +103,7 @@
 	CT_EXEC_USERSPACE,
 	CT_ACCESS_USERSPACE,
 	CT_WRITE_RO,
+	CT_WRITE_RO_AFTER_INIT,
 	CT_WRITE_KERN,
 };
 
@@ -140,6 +141,7 @@
 	"EXEC_USERSPACE",
 	"ACCESS_USERSPACE",
 	"WRITE_RO",
+	"WRITE_RO_AFTER_INIT",
 	"WRITE_KERN",
 };
 
@@ -162,6 +164,7 @@
 static u8 data_area[EXEC_SIZE];
 
 static const unsigned long rodata = 0xAA55AA55;
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
 
 module_param(recur_count, int, 0644);
 MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
@@ -503,11 +506,28 @@
 		break;
 	}
 	case CT_WRITE_RO: {
-		unsigned long *ptr;
+		/* Explicitly cast away "const" for the test. */
+		unsigned long *ptr = (unsigned long *)&rodata;
 
-		ptr = (unsigned long *)&rodata;
+		pr_info("attempting bad rodata write at %p\n", ptr);
+		*ptr ^= 0xabcd1234;
 
-		pr_info("attempting bad write at %p\n", ptr);
+		break;
+	}
+	case CT_WRITE_RO_AFTER_INIT: {
+		unsigned long *ptr = &ro_after_init;
+
+		/*
+		 * Verify we were written to during init. Since an Oops
+		 * is considered a "success", a failure is to just skip the
+		 * real test.
+		 */
+		if ((*ptr & 0xAA) != 0xAA) {
+			pr_info("%p was NOT written during init!?\n", ptr);
+			break;
+		}
+
+		pr_info("attempting bad ro_after_init write at %p\n", ptr);
 		*ptr ^= 0xabcd1234;
 
 		break;
@@ -817,6 +837,9 @@
 	int n_debugfs_entries = 1; /* Assume only the direct entry */
 	int i;
 
+	/* Make sure we can write to __ro_after_init values during __init */
+	ro_after_init |= 0xAA;
+
 	/* Register debugfs interface */
 	lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
 	if (!lkdtm_debugfs_root) {
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 677d0362..80f9afc 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -458,7 +458,11 @@
 {
 	struct mei_cl *cl = file->private_data;
 
-	return mei_cl_notify_request(cl, file, request);
+	if (request != MEI_HBM_NOTIFICATION_START &&
+	    request != MEI_HBM_NOTIFICATION_STOP)
+		return -EINVAL;
+
+	return mei_cl_notify_request(cl, file, (u8)request);
 }
 
 /**
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 5914263..fe207e5 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -47,13 +47,10 @@
 #include "queue.h"
 
 MODULE_ALIAS("mmc:block");
-
-#ifdef KERNEL
 #ifdef MODULE_PARAM_PREFIX
 #undef MODULE_PARAM_PREFIX
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
-#endif
 
 #define INAND_CMD38_ARG_EXT_CSD  113
 #define INAND_CMD38_ARG_ERASE    0x00
@@ -655,8 +652,10 @@
 	}
 
 	md = mmc_blk_get(bdev->bd_disk);
-	if (!md)
+	if (!md) {
+		err = -EINVAL;
 		goto cmd_err;
+	}
 
 	card = md->queue.card;
 	if (IS_ERR(card)) {
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 1c1b45e..3446097 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -925,6 +925,10 @@
 
 			dma_addr = dma_map_page(dma_dev, sg_page(sg), 0,
 						PAGE_SIZE, dir);
+			if (dma_mapping_error(dma_dev, dma_addr)) {
+				data->error = -EFAULT;
+				break;
+			}
 			if (direction == DMA_TO_DEVICE)
 				t->tx_dma = dma_addr + sg->offset;
 			else
@@ -1393,10 +1397,12 @@
 		host->dma_dev = dev;
 		host->ones_dma = dma_map_single(dev, ones,
 				MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, host->ones_dma))
+			goto fail_ones_dma;
 		host->data_dma = dma_map_single(dev, host->data,
 				sizeof(*host->data), DMA_BIDIRECTIONAL);
-
-		/* REVISIT in theory those map operations can fail... */
+		if (dma_mapping_error(dev, host->data_dma))
+			goto fail_data_dma;
 
 		dma_sync_single_for_cpu(host->dma_dev,
 				host->data_dma, sizeof(*host->data),
@@ -1462,6 +1468,11 @@
 	if (host->dma_dev)
 		dma_unmap_single(host->dma_dev, host->data_dma,
 				sizeof(*host->data), DMA_BIDIRECTIONAL);
+fail_data_dma:
+	if (host->dma_dev)
+		dma_unmap_single(host->dma_dev, host->ones_dma,
+				MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE);
+fail_ones_dma:
 	kfree(host->data);
 
 fail_nobuf1:
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index b6639ea..f6e4d97 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2232,6 +2232,7 @@
 		dma_release_channel(host->tx_chan);
 	if (host->rx_chan)
 		dma_release_channel(host->rx_chan);
+	pm_runtime_dont_use_autosuspend(host->dev);
 	pm_runtime_put_sync(host->dev);
 	pm_runtime_disable(host->dev);
 	if (host->dbclk)
@@ -2253,6 +2254,7 @@
 	dma_release_channel(host->tx_chan);
 	dma_release_channel(host->rx_chan);
 
+	pm_runtime_dont_use_autosuspend(host->dev);
 	pm_runtime_put_sync(host->dev);
 	pm_runtime_disable(host->dev);
 	device_init_wakeup(&pdev->dev, false);
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index ce08896..da82477 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -86,7 +86,7 @@
 static inline void pxamci_init_ocr(struct pxamci_host *host)
 {
 #ifdef CONFIG_REGULATOR
-	host->vcc = regulator_get_optional(mmc_dev(host->mmc), "vmmc");
+	host->vcc = devm_regulator_get_optional(mmc_dev(host->mmc), "vmmc");
 
 	if (IS_ERR(host->vcc))
 		host->vcc = NULL;
@@ -654,12 +654,8 @@
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
-	if (!r || irq < 0)
-		return -ENXIO;
-
-	r = request_mem_region(r->start, SZ_4K, DRIVER_NAME);
-	if (!r)
-		return -EBUSY;
+	if (irq < 0)
+		return irq;
 
 	mmc = mmc_alloc_host(sizeof(struct pxamci_host), &pdev->dev);
 	if (!mmc) {
@@ -695,7 +691,7 @@
 	host->pdata = pdev->dev.platform_data;
 	host->clkrt = CLKRT_OFF;
 
-	host->clk = clk_get(&pdev->dev, NULL);
+	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		ret = PTR_ERR(host->clk);
 		host->clk = NULL;
@@ -727,9 +723,9 @@
 	host->irq = irq;
 	host->imask = MMC_I_MASK_ALL;
 
-	host->base = ioremap(r->start, SZ_4K);
-	if (!host->base) {
-		ret = -ENOMEM;
+	host->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(host->base)) {
+		ret = PTR_ERR(host->base);
 		goto out;
 	}
 
@@ -742,7 +738,8 @@
 	writel(64, host->base + MMC_RESTO);
 	writel(host->imask, host->base + MMC_I_MASK);
 
-	ret = request_irq(host->irq, pxamci_irq, 0, DRIVER_NAME, host);
+	ret = devm_request_irq(&pdev->dev, host->irq, pxamci_irq, 0,
+			       DRIVER_NAME, host);
 	if (ret)
 		goto out;
 
@@ -804,7 +801,7 @@
 		dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro);
 		goto out;
 	} else {
-		mmc->caps |= host->pdata->gpio_card_ro_invert ?
+		mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
 			0 : MMC_CAP2_RO_ACTIVE_HIGH;
 	}
 
@@ -833,14 +830,9 @@
 			dma_release_channel(host->dma_chan_rx);
 		if (host->dma_chan_tx)
 			dma_release_channel(host->dma_chan_tx);
-		if (host->base)
-			iounmap(host->base);
-		if (host->clk)
-			clk_put(host->clk);
 	}
 	if (mmc)
 		mmc_free_host(mmc);
-	release_resource(r);
 	return ret;
 }
 
@@ -859,9 +851,6 @@
 			gpio_ro = host->pdata->gpio_card_ro;
 			gpio_power = host->pdata->gpio_power;
 		}
-		if (host->vcc)
-			regulator_put(host->vcc);
-
 		if (host->pdata && host->pdata->exit)
 			host->pdata->exit(&pdev->dev, mmc);
 
@@ -870,16 +859,10 @@
 		       END_CMD_RES|PRG_DONE|DATA_TRAN_DONE,
 		       host->base + MMC_I_MASK);
 
-		free_irq(host->irq, host);
 		dmaengine_terminate_all(host->dma_chan_rx);
 		dmaengine_terminate_all(host->dma_chan_tx);
 		dma_release_channel(host->dma_chan_rx);
 		dma_release_channel(host->dma_chan_tx);
-		iounmap(host->base);
-
-		clk_put(host->clk);
-
-		release_resource(host->res);
 
 		mmc_free_host(mmc);
 	}
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index f6047fc..a5cda92 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -146,6 +146,33 @@
 	.ops = &sdhci_acpi_ops_int,
 };
 
+static int bxt_get_cd(struct mmc_host *mmc)
+{
+	int gpio_cd = mmc_gpio_get_cd(mmc);
+	struct sdhci_host *host = mmc_priv(mmc);
+	unsigned long flags;
+	int ret = 0;
+
+	if (!gpio_cd)
+		return 0;
+
+	pm_runtime_get_sync(mmc->parent);
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	if (host->flags & SDHCI_DEVICE_DEAD)
+		goto out;
+
+	ret = !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	pm_runtime_mark_last_busy(mmc->parent);
+	pm_runtime_put_autosuspend(mmc->parent);
+
+	return ret;
+}
+
 static int sdhci_acpi_emmc_probe_slot(struct platform_device *pdev,
 				      const char *hid, const char *uid)
 {
@@ -196,6 +223,9 @@
 
 	/* Platform specific code during sd probe slot goes here */
 
+	if (hid && !strcmp(hid, "80865ACA"))
+		host->mmc_host_ops.get_cd = bxt_get_cd;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 7e7d8f0..9cb86fb 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -217,6 +217,7 @@
 pm_runtime_disable:
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
 clocks_disable_unprepare:
 	clk_disable_unprepare(priv->gck);
 	clk_disable_unprepare(priv->mainck);
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index cc851b0..df3b8ec 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -330,6 +330,33 @@
 	sdhci_pci_spt_drive_strength = 0x10 | ((val >> 12) & 0xf);
 }
 
+static int bxt_get_cd(struct mmc_host *mmc)
+{
+	int gpio_cd = mmc_gpio_get_cd(mmc);
+	struct sdhci_host *host = mmc_priv(mmc);
+	unsigned long flags;
+	int ret = 0;
+
+	if (!gpio_cd)
+		return 0;
+
+	pm_runtime_get_sync(mmc->parent);
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	if (host->flags & SDHCI_DEVICE_DEAD)
+		goto out;
+
+	ret = !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	pm_runtime_mark_last_busy(mmc->parent);
+	pm_runtime_put_autosuspend(mmc->parent);
+
+	return ret;
+}
+
 static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 {
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
@@ -362,6 +389,10 @@
 	slot->cd_con_id = NULL;
 	slot->cd_idx = 0;
 	slot->cd_override_level = true;
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD ||
+	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD)
+		slot->host->mmc_host_ops.get_cd = bxt_get_cd;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index d622435..add9fdf 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1360,7 +1360,7 @@
 	sdhci_runtime_pm_get(host);
 
 	/* Firstly check card presence */
-	present = sdhci_do_get_cd(host);
+	present = mmc->ops->get_cd(mmc);
 
 	spin_lock_irqsave(&host->lock, flags);
 
@@ -2849,6 +2849,8 @@
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
+	host->mmc_host_ops = sdhci_ops;
+	mmc->ops = &host->mmc_host_ops;
 
 	return host;
 }
@@ -3037,7 +3039,6 @@
 	/*
 	 * Set host parameters.
 	 */
-	mmc->ops = &sdhci_ops;
 	max_clk = host->max_clk;
 
 	if (host->ops->get_min_clock)
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 7654ae5..0115e99 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -430,6 +430,7 @@
 
 	/* Internal data */
 	struct mmc_host *mmc;	/* MMC structure */
+	struct mmc_host_ops mmc_host_ops;	/* MMC host ops */
 	u64 dma_mask;		/* custom DMA mask */
 
 #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 1ca8a13..6234eab3 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -445,7 +445,7 @@
 							pdata->slave_id_rx);
 	} else {
 		host->chan_tx = dma_request_slave_channel(dev, "tx");
-		host->chan_tx = dma_request_slave_channel(dev, "rx");
+		host->chan_rx = dma_request_slave_channel(dev, "rx");
 	}
 	dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx,
 		host->chan_rx);
diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c
index 7931615..88b6c81 100644
--- a/drivers/mtd/tests/mtd_nandecctest.c
+++ b/drivers/mtd/tests/mtd_nandecctest.c
@@ -187,7 +187,7 @@
 	__nand_calculate_ecc(error_data, size, calc_ecc);
 	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size);
 
-	return (ret == -1) ? 0 : -EINVAL;
+	return (ret == -EBADMSG) ? 0 : -EINVAL;
 }
 
 static const struct nand_ecc_test nand_ecc_test[] = {
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 2a1b6e0..0134ba3 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -193,7 +193,7 @@
 	vol->changing_leb = 1;
 	vol->ch_lnum = req->lnum;
 
-	vol->upd_buf = vmalloc(req->bytes);
+	vol->upd_buf = vmalloc(ALIGN((int)req->bytes, ubi->min_io_size));
 	if (!vol->upd_buf)
 		return -ENOMEM;
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 56b5605..b7f1a99 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -214,6 +214,8 @@
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 						struct rtnl_link_stats64 *stats);
 static void bond_slave_arr_handler(struct work_struct *work);
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+				  int mod);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -2127,6 +2129,7 @@
 			continue;
 
 		case BOND_LINK_UP:
+			bond_update_speed_duplex(slave);
 			bond_set_slave_link_state(slave, BOND_LINK_UP,
 						  BOND_SLAVE_NOTIFY_NOW);
 			slave->last_link_up = jiffies;
@@ -2459,7 +2462,7 @@
 		 struct slave *slave)
 {
 	struct arphdr *arp = (struct arphdr *)skb->data;
-	struct slave *curr_active_slave;
+	struct slave *curr_active_slave, *curr_arp_slave;
 	unsigned char *arp_ptr;
 	__be32 sip, tip;
 	int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@ -2506,26 +2509,41 @@
 		     &sip, &tip);
 
 	curr_active_slave = rcu_dereference(bond->curr_active_slave);
+	curr_arp_slave = rcu_dereference(bond->current_arp_slave);
 
-	/* Backup slaves won't see the ARP reply, but do come through
-	 * here for each ARP probe (so we swap the sip/tip to validate
-	 * the probe).  In a "redundant switch, common router" type of
-	 * configuration, the ARP probe will (hopefully) travel from
-	 * the active, through one switch, the router, then the other
-	 * switch before reaching the backup.
+	/* We 'trust' the received ARP enough to validate it if:
 	 *
-	 * We 'trust' the arp requests if there is an active slave and
-	 * it received valid arp reply(s) after it became active. This
-	 * is done to avoid endless looping when we can't reach the
+	 * (a) the slave receiving the ARP is active (which includes the
+	 * current ARP slave, if any), or
+	 *
+	 * (b) the receiving slave isn't active, but there is a currently
+	 * active slave and it received valid arp reply(s) after it became
+	 * the currently active slave, or
+	 *
+	 * (c) there is an ARP slave that sent an ARP during the prior ARP
+	 * interval, and we receive an ARP reply on any slave.  We accept
+	 * these because switch FDB update delays may deliver the ARP
+	 * reply to a slave other than the sender of the ARP request.
+	 *
+	 * Note: for (b), backup slaves are receiving the broadcast ARP
+	 * request, not a reply.  This request passes from the sending
+	 * slave through the L2 switch(es) to the receiving slave.  Since
+	 * this is checking the request, sip/tip are swapped for
+	 * validation.
+	 *
+	 * This is done to avoid endless looping when we can't reach the
 	 * arp_ip_target and fool ourselves with our own arp requests.
 	 */
-
 	if (bond_is_active_slave(slave))
 		bond_validate_arp(bond, slave, sip, tip);
 	else if (curr_active_slave &&
 		 time_after(slave_last_rx(bond, curr_active_slave),
 			    curr_active_slave->last_link_up))
 		bond_validate_arp(bond, slave, tip, sip);
+	else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+		 bond_time_in_interval(bond,
+				       dev_trans_start(curr_arp_slave->dev), 1))
+		bond_validate_arp(bond, slave, sip, tip);
 
 out_unlock:
 	if (arp != (struct arphdr *)skb->data)
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 575790e..74a7dfe 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -843,7 +843,7 @@
 		if (clear_intf)
 			mcp251x_write_bits(spi, CANINTF, clear_intf, 0x00);
 
-		if (eflag)
+		if (eflag & (EFLG_RX0OVR | EFLG_RX1OVR))
 			mcp251x_write_bits(spi, EFLG, eflag, 0x00);
 
 		/* Update can state */
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index fc5b756..eb7192f 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -117,6 +117,9 @@
  */
 #define EMS_USB_ARM7_CLOCK 8000000
 
+#define CPC_TX_QUEUE_TRIGGER_LOW	25
+#define CPC_TX_QUEUE_TRIGGER_HIGH	35
+
 /*
  * CAN-Message representation in a CPC_MSG. Message object type is
  * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or
@@ -278,6 +281,11 @@
 	switch (urb->status) {
 	case 0:
 		dev->free_slots = dev->intr_in_buffer[1];
+		if(dev->free_slots > CPC_TX_QUEUE_TRIGGER_HIGH){
+			if (netif_queue_stopped(netdev)){
+				netif_wake_queue(netdev);
+			}
+		}
 		break;
 
 	case -ECONNRESET: /* unlink */
@@ -526,8 +534,6 @@
 	/* Release context */
 	context->echo_index = MAX_TX_URBS;
 
-	if (netif_queue_stopped(netdev))
-		netif_wake_queue(netdev);
 }
 
 /*
@@ -587,7 +593,7 @@
 	int err, i;
 
 	dev->intr_in_buffer[0] = 0;
-	dev->free_slots = 15; /* initial size */
+	dev->free_slots = 50; /* initial size */
 
 	for (i = 0; i < MAX_RX_URBS; i++) {
 		struct urb *urb = NULL;
@@ -835,7 +841,7 @@
 
 		/* Slow down tx path */
 		if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS ||
-		    dev->free_slots < 5) {
+		    dev->free_slots < CPC_TX_QUEUE_TRIGGER_LOW) {
 			netif_stop_queue(netdev);
 		}
 	}
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 5eee62b..cbc99d5 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -826,9 +826,8 @@
 static void gs_destroy_candev(struct gs_can *dev)
 {
 	unregister_candev(dev->netdev);
-	free_candev(dev->netdev);
 	usb_kill_anchored_urbs(&dev->tx_submitted);
-	kfree(dev);
+	free_candev(dev->netdev);
 }
 
 static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -913,12 +912,15 @@
 	for (i = 0; i < icount; i++) {
 		dev->canch[i] = gs_make_candev(i, intf);
 		if (IS_ERR_OR_NULL(dev->canch[i])) {
+			/* save error code to return later */
+			rc = PTR_ERR(dev->canch[i]);
+
 			/* on failure destroy previously created candevs */
 			icount = i;
-			for (i = 0; i < icount; i++) {
+			for (i = 0; i < icount; i++)
 				gs_destroy_candev(dev->canch[i]);
-				dev->canch[i] = NULL;
-			}
+
+			usb_kill_anchored_urbs(&dev->rx_submitted);
 			kfree(dev);
 			return rc;
 		}
@@ -939,16 +941,12 @@
 		return;
 	}
 
-	for (i = 0; i < GS_MAX_INTF; i++) {
-		struct gs_can *can = dev->canch[i];
-
-		if (!can)
-			continue;
-
-		gs_destroy_candev(can);
-	}
+	for (i = 0; i < GS_MAX_INTF; i++)
+		if (dev->canch[i])
+			gs_destroy_candev(dev->canch[i]);
 
 	usb_kill_anchored_urbs(&dev->rx_submitted);
+	kfree(dev);
 }
 
 static const struct usb_device_id gs_usb_table[] = {
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index cc6c545..a47f52f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -25,6 +25,7 @@
 static const struct mv88e6xxx_switch_id mv88e6352_table[] = {
 	{ PORT_SWITCH_ID_6172, "Marvell 88E6172" },
 	{ PORT_SWITCH_ID_6176, "Marvell 88E6176" },
+	{ PORT_SWITCH_ID_6240, "Marvell 88E6240" },
 	{ PORT_SWITCH_ID_6320, "Marvell 88E6320" },
 	{ PORT_SWITCH_ID_6320_A1, "Marvell 88E6320 (A1)" },
 	{ PORT_SWITCH_ID_6320_A2, "Marvell 88e6320 (A2)" },
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 9fe33fc..512c8c0 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1532,7 +1532,7 @@
 
 	/* no PVID with ranges, otherwise it's a bug */
 	if (pvid)
-		err = _mv88e6xxx_port_pvid_set(ds, port, vid);
+		err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end);
 unlock:
 	mutex_unlock(&ps->smi_mutex);
 
@@ -1555,7 +1555,7 @@
 
 	if (vlan.vid != vid || !vlan.valid ||
 	    vlan.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-		return -ENOENT;
+		return -EOPNOTSUPP;
 
 	vlan.data[port] = GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
 
@@ -1582,6 +1582,7 @@
 			    const struct switchdev_obj_port_vlan *vlan)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+	const u16 defpvid = 4000 + ds->index * DSA_MAX_PORTS + port;
 	u16 pvid, vid;
 	int err = 0;
 
@@ -1597,7 +1598,8 @@
 			goto unlock;
 
 		if (vid == pvid) {
-			err = _mv88e6xxx_port_pvid_set(ds, port, 0);
+			/* restore reserved VLAN ID */
+			err = _mv88e6xxx_port_pvid_set(ds, port, defpvid);
 			if (err)
 				goto unlock;
 		}
@@ -1889,26 +1891,20 @@
 
 int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, u32 members)
 {
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
-	int err;
-
-	/* The port joined a bridge, so leave its reserved VLAN */
-	mutex_lock(&ps->smi_mutex);
-	err = _mv88e6xxx_port_vlan_del(ds, port, pvid);
-	if (!err)
-		err = _mv88e6xxx_port_pvid_set(ds, port, 0);
-	mutex_unlock(&ps->smi_mutex);
-	return err;
+	return 0;
 }
 
 int mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, u32 members)
 {
+	return 0;
+}
+
+static int mv88e6xxx_setup_port_default_vlan(struct dsa_switch *ds, int port)
+{
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
 	int err;
 
-	/* The port left the bridge, so join its reserved VLAN */
 	mutex_lock(&ps->smi_mutex);
 	err = _mv88e6xxx_port_vlan_add(ds, port, pvid, true);
 	if (!err)
@@ -2163,7 +2159,8 @@
 	 * database, and allow every port to egress frames on all other ports.
 	 */
 	reg = BIT(ps->num_ports) - 1; /* all ports */
-	ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg & ~port);
+	reg &= ~BIT(port); /* except itself */
+	ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg);
 	if (ret)
 		goto abort;
 
@@ -2191,8 +2188,7 @@
 		if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
 			continue;
 
-		/* setup the unbridged state */
-		ret = mv88e6xxx_port_bridge_leave(ds, i, 0);
+		ret = mv88e6xxx_setup_port_default_vlan(ds, i);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 79e1a02..17b2126 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -2461,7 +2461,7 @@
 					int i;
 					pci_unmap_single(VORTEX_PCI(vp),
 							le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
-							le32_to_cpu(vp->tx_ring[entry].frag[0].length),
+							le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
 							PCI_DMA_TODEVICE);
 
 					for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 2777289..2f79d29 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -1501,6 +1501,7 @@
 	PCMCIA_DEVICE_MANF_CARD(0x026f, 0x030a),
 	PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1103),
 	PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1121),
+	PCMCIA_DEVICE_MANF_CARD(0xc001, 0x0009),
 	PCMCIA_DEVICE_PROD_ID12("2408LAN", "Ethernet", 0x352fff7f, 0x00b2e941),
 	PCMCIA_DEVICE_PROD_ID1234("Socket", "CF 10/100 Ethernet Card", "Revision B", "05/11/06", 0xb38bcc2e, 0x4de88352, 0xeaca6c8d, 0x7e57c22e),
 	PCMCIA_DEVICE_PROD_ID123("Cardwell", "PCMCIA", "ETHERNET", 0x9533672e, 0x281f1c5d, 0x3ff7175b),
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 3f3bcbe..0907ab6 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -2380,7 +2380,7 @@
 						    sizeof(u32),
 						    &tx_ring->tx_status_pa,
 						    GFP_KERNEL);
-	if (!tx_ring->tx_status_pa) {
+	if (!tx_ring->tx_status) {
 		dev_err(&adapter->pdev->dev,
 			"Cannot alloc memory for Tx status block\n");
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 1747285..f749e4d 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -193,7 +193,6 @@
 			    priv->mdio->id);
 
 	mdiobus_unregister(priv->mdio);
-	kfree(priv->mdio->irq);
 	mdiobus_free(priv->mdio);
 	priv->mdio = NULL;
 }
diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index 87e727b..fcdf5dd 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -50,8 +50,8 @@
 static void write_rreg(u_long base, u_int reg, u_int val)
 {
 	asm volatile(
-	"str%?h	%1, [%2]	@ NET_RAP\n\t"
-	"str%?h	%0, [%2, #-4]	@ NET_RDP"
+	"strh	%1, [%2]	@ NET_RAP\n\t"
+	"strh	%0, [%2, #-4]	@ NET_RDP"
 	:
 	: "r" (val), "r" (reg), "r" (ISAIO_BASE + 0x0464));
 }
@@ -60,8 +60,8 @@
 {
 	unsigned short v;
 	asm volatile(
-	"str%?h	%1, [%2]	@ NET_RAP\n\t"
-	"ldr%?h	%0, [%2, #-4]	@ NET_RDP"
+	"strh	%1, [%2]	@ NET_RAP\n\t"
+	"ldrh	%0, [%2, #-4]	@ NET_RDP"
 	: "=r" (v)
 	: "r" (reg), "r" (ISAIO_BASE + 0x0464));
 	return v;
@@ -70,8 +70,8 @@
 static inline void write_ireg(u_long base, u_int reg, u_int val)
 {
 	asm volatile(
-	"str%?h	%1, [%2]	@ NET_RAP\n\t"
-	"str%?h	%0, [%2, #8]	@ NET_IDP"
+	"strh	%1, [%2]	@ NET_RAP\n\t"
+	"strh	%0, [%2, #8]	@ NET_IDP"
 	:
 	: "r" (val), "r" (reg), "r" (ISAIO_BASE + 0x0464));
 }
@@ -80,8 +80,8 @@
 {
 	u_short v;
 	asm volatile(
-	"str%?h	%1, [%2]	@ NAT_RAP\n\t"
-	"ldr%?h	%0, [%2, #8]	@ NET_IDP\n\t"
+	"strh	%1, [%2]	@ NAT_RAP\n\t"
+	"ldrh	%0, [%2, #8]	@ NET_IDP\n\t"
 	: "=r" (v)
 	: "r" (reg), "r" (ISAIO_BASE + 0x0464));
 	return v;
@@ -96,7 +96,7 @@
 	offset = ISAMEM_BASE + (offset << 1);
 	length = (length + 1) & ~1;
 	if ((int)buf & 2) {
-		asm volatile("str%?h	%2, [%0], #4"
+		asm volatile("strh	%2, [%0], #4"
 		 : "=&r" (offset) : "0" (offset), "r" (buf[0] | (buf[1] << 8)));
 		buf += 2;
 		length -= 2;
@@ -104,20 +104,20 @@
 	while (length > 8) {
 		register unsigned int tmp asm("r2"), tmp2 asm("r3");
 		asm volatile(
-			"ldm%?ia	%0!, {%1, %2}"
+			"ldmia	%0!, {%1, %2}"
 			: "+r" (buf), "=&r" (tmp), "=&r" (tmp2));
 		length -= 8;
 		asm volatile(
-			"str%?h	%1, [%0], #4\n\t"
-			"mov%?	%1, %1, lsr #16\n\t"
-			"str%?h	%1, [%0], #4\n\t"
-			"str%?h	%2, [%0], #4\n\t"
-			"mov%?	%2, %2, lsr #16\n\t"
-			"str%?h	%2, [%0], #4"
+			"strh	%1, [%0], #4\n\t"
+			"mov	%1, %1, lsr #16\n\t"
+			"strh	%1, [%0], #4\n\t"
+			"strh	%2, [%0], #4\n\t"
+			"mov	%2, %2, lsr #16\n\t"
+			"strh	%2, [%0], #4"
 		: "+r" (offset), "=&r" (tmp), "=&r" (tmp2));
 	}
 	while (length > 0) {
-		asm volatile("str%?h	%2, [%0], #4"
+		asm volatile("strh	%2, [%0], #4"
 		 : "=&r" (offset) : "0" (offset), "r" (buf[0] | (buf[1] << 8)));
 		buf += 2;
 		length -= 2;
@@ -132,23 +132,23 @@
 	if ((int)buf & 2) {
 		unsigned int tmp;
 		asm volatile(
-			"ldr%?h	%2, [%0], #4\n\t"
-			"str%?b	%2, [%1], #1\n\t"
-			"mov%?	%2, %2, lsr #8\n\t"
-			"str%?b	%2, [%1], #1"
+			"ldrh	%2, [%0], #4\n\t"
+			"strb	%2, [%1], #1\n\t"
+			"mov	%2, %2, lsr #8\n\t"
+			"strb	%2, [%1], #1"
 		: "=&r" (offset), "=&r" (buf), "=r" (tmp): "0" (offset), "1" (buf));
 		length -= 2;
 	}
 	while (length > 8) {
 		register unsigned int tmp asm("r2"), tmp2 asm("r3"), tmp3;
 		asm volatile(
-			"ldr%?h	%2, [%0], #4\n\t"
-			"ldr%?h	%4, [%0], #4\n\t"
-			"ldr%?h	%3, [%0], #4\n\t"
-			"orr%?	%2, %2, %4, lsl #16\n\t"
-			"ldr%?h	%4, [%0], #4\n\t"
-			"orr%?	%3, %3, %4, lsl #16\n\t"
-			"stm%?ia	%1!, {%2, %3}"
+			"ldrh	%2, [%0], #4\n\t"
+			"ldrh	%4, [%0], #4\n\t"
+			"ldrh	%3, [%0], #4\n\t"
+			"orr	%2, %2, %4, lsl #16\n\t"
+			"ldrh	%4, [%0], #4\n\t"
+			"orr	%3, %3, %4, lsl #16\n\t"
+			"stmia	%1!, {%2, %3}"
 		: "=&r" (offset), "=&r" (buf), "=r" (tmp), "=r" (tmp2), "=r" (tmp3)
 		: "0" (offset), "1" (buf));
 		length -= 8;
@@ -156,10 +156,10 @@
 	while (length > 0) {
 		unsigned int tmp;
 		asm volatile(
-			"ldr%?h	%2, [%0], #4\n\t"
-			"str%?b	%2, [%1], #1\n\t"
-			"mov%?	%2, %2, lsr #8\n\t"
-			"str%?b	%2, [%1], #1"
+			"ldrh	%2, [%0], #4\n\t"
+			"strb	%2, [%1], #1\n\t"
+			"mov	%2, %2, lsr #8\n\t"
+			"strb	%2, [%1], #1"
 		: "=&r" (offset), "=&r" (buf), "=r" (tmp) : "0" (offset), "1" (buf));
 		length -= 2;
 	}
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index 256f590..3a7ebfd 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -547,8 +547,8 @@
 	/* Make certain the data structures used by the LANCE are aligned and DMAble. */
 
 	lp = kzalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL);
-	if(lp==NULL)
-		return -ENODEV;
+	if (!lp)
+		return -ENOMEM;
 	if (lance_debug > 6) printk(" (#0x%05lx)", (unsigned long)lp);
 	dev->ml_priv = lp;
 	lp->name = chipname;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index a4799c1..5eb9b20 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -628,6 +628,7 @@
 	int ret;
 
 	ring = pdata->rx_ring;
+	irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
 	ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
 			       IRQF_SHARED, ring->irq_name, ring);
 	if (ret)
@@ -635,6 +636,7 @@
 
 	if (pdata->cq_cnt) {
 		ring = pdata->tx_ring->cp_ring;
+		irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
 		ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
 				       IRQF_SHARED, ring->irq_name, ring);
 		if (ret) {
@@ -649,15 +651,19 @@
 static void xgene_enet_free_irq(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata;
+	struct xgene_enet_desc_ring *ring;
 	struct device *dev;
 
 	pdata = netdev_priv(ndev);
 	dev = ndev_to_dev(ndev);
-	devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+	ring = pdata->rx_ring;
+	irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+	devm_free_irq(dev, ring->irq, ring);
 
 	if (pdata->cq_cnt) {
-		devm_free_irq(dev, pdata->tx_ring->cp_ring->irq,
-			      pdata->tx_ring->cp_ring);
+		ring = pdata->tx_ring->cp_ring;
+		irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+		devm_free_irq(dev, ring->irq, ring);
 	}
 }
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 70d5b62..248dfc4 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -25,6 +25,7 @@
 #include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/efi.h>
+#include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index abe1eab..6446af1 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -163,7 +163,7 @@
 		struct sk_buff *skb = tx_buff->skb;
 		unsigned int info = le32_to_cpu(txbd->info);
 
-		if ((info & FOR_EMAC) || !txbd->data)
+		if ((info & FOR_EMAC) || !txbd->data || !skb)
 			break;
 
 		if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
@@ -191,6 +191,7 @@
 
 		txbd->data = 0;
 		txbd->info = 0;
+		tx_buff->skb = NULL;
 
 		*txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
 	}
@@ -446,6 +447,9 @@
 		*last_rx_bd = (*last_rx_bd + 1) % RX_BD_NUM;
 	}
 
+	priv->txbd_curr = 0;
+	priv->txbd_dirty = 0;
+
 	/* Clean Tx BD's */
 	memset(priv->txbd, 0, TX_RING_SZ);
 
@@ -514,6 +518,64 @@
 }
 
 /**
+ * arc_free_tx_queue - free skb from tx queue
+ * @ndev:	Pointer to the network device.
+ *
+ * This function must be called while EMAC disable
+ */
+static void arc_free_tx_queue(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	unsigned int i;
+
+	for (i = 0; i < TX_BD_NUM; i++) {
+		struct arc_emac_bd *txbd = &priv->txbd[i];
+		struct buffer_state *tx_buff = &priv->tx_buff[i];
+
+		if (tx_buff->skb) {
+			dma_unmap_single(&ndev->dev, dma_unmap_addr(tx_buff, addr),
+					 dma_unmap_len(tx_buff, len), DMA_TO_DEVICE);
+
+			/* return the sk_buff to system */
+			dev_kfree_skb_irq(tx_buff->skb);
+		}
+
+		txbd->info = 0;
+		txbd->data = 0;
+		tx_buff->skb = NULL;
+	}
+}
+
+/**
+ * arc_free_rx_queue - free skb from rx queue
+ * @ndev:	Pointer to the network device.
+ *
+ * This function must be called while EMAC disable
+ */
+static void arc_free_rx_queue(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	unsigned int i;
+
+	for (i = 0; i < RX_BD_NUM; i++) {
+		struct arc_emac_bd *rxbd = &priv->rxbd[i];
+		struct buffer_state *rx_buff = &priv->rx_buff[i];
+
+		if (rx_buff->skb) {
+			dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+					dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+			/* return the sk_buff to system */
+			dev_kfree_skb_irq(rx_buff->skb);
+		}
+
+		rxbd->info = 0;
+		rxbd->data = 0;
+		rx_buff->skb = NULL;
+	}
+}
+
+/**
  * arc_emac_stop - Close the network device.
  * @ndev:	Pointer to the network device.
  *
@@ -534,6 +596,10 @@
 	/* Disable EMAC */
 	arc_reg_clr(priv, R_CTRL, EN_MASK);
 
+	/* Return the sk_buff to system */
+	arc_free_tx_queue(ndev);
+	arc_free_rx_queue(ndev);
+
 	return 0;
 }
 
@@ -610,7 +676,6 @@
 	dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], addr, addr);
 	dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
 
-	priv->tx_buff[*txbd_curr].skb = skb;
 	priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
 
 	/* Make sure pointer to data buffer is set */
@@ -620,6 +685,11 @@
 
 	*info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
 
+	/* Make sure info word is set */
+	wmb();
+
+	priv->tx_buff[*txbd_curr].skb = skb;
+
 	/* Increment index to point to the next BD */
 	*txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
 
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index ecc4a33..08a23e6 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -302,7 +302,7 @@
 	nb8800_tx_done(dev);
 
 again:
-	while (work < budget) {
+	do {
 		struct nb8800_rx_buf *rxb;
 		unsigned int len;
 
@@ -330,7 +330,7 @@
 		rxd->report = 0;
 		last = next;
 		work++;
-	}
+	} while (work < budget);
 
 	if (work) {
 		priv->rx_descs[last].desc.config |= DESC_EOC;
@@ -1460,7 +1460,19 @@
 		goto err_disable_clk;
 	}
 
-	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (of_phy_is_fixed_link(pdev->dev.of_node)) {
+		ret = of_phy_register_fixed_link(pdev->dev.of_node);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "bad fixed-link spec\n");
+			goto err_free_bus;
+		}
+		priv->phy_node = of_node_get(pdev->dev.of_node);
+	}
+
+	if (!priv->phy_node)
+		priv->phy_node = of_parse_phandle(pdev->dev.of_node,
+						  "phy-handle", 0);
+
 	if (!priv->phy_node) {
 		dev_err(&pdev->dev, "no PHY specified\n");
 		ret = -ENODEV;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 8550df1..19f7cd0 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -151,8 +151,11 @@
 
 config BGMAC
 	tristate "BCMA bus GBit core support"
-	depends on BCMA_HOST_SOC && HAS_DMA && (BCM47XX || ARCH_BCM_5301X)
+	depends on BCMA && BCMA_HOST_SOC
+	depends on HAS_DMA
+	depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
 	select PHYLIB
+	select FIXED_PHY
 	---help---
 	  This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus.
 	  They can be found on BCM47xx SoCs and provide gigabit ethernet.
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 27aa080..91874d2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -4896,9 +4896,9 @@
  * cfc delete event data
  */
 struct cfc_del_event_data {
-	u32 cid;
-	u32 reserved0;
-	u32 reserved1;
+	__le32 cid;
+	__le32 reserved0;
+	__le32 reserved1;
 };
 
 
@@ -5114,15 +5114,9 @@
  * zone that triggers the in-bound interrupt
  */
 struct trigger_vf_zone {
-#if defined(__BIG_ENDIAN)
-	u16 reserved1;
-	u8 reserved0;
-	struct vf_pf_channel_zone_trigger vf_pf_channel;
-#elif defined(__LITTLE_ENDIAN)
 	struct vf_pf_channel_zone_trigger vf_pf_channel;
 	u8 reserved0;
 	u16 reserved1;
-#endif
 	u32 reserved2;
 };
 
@@ -5207,9 +5201,9 @@
  * set mac event data
  */
 struct eth_event_data {
-	u32 echo;
-	u32 reserved0;
-	u32 reserved1;
+	__le32 echo;
+	__le32 reserved0;
+	__le32 reserved1;
 };
 
 
@@ -5219,9 +5213,9 @@
 struct vf_pf_event_data {
 	u8 vf_id;
 	u8 reserved0;
-	u16 reserved1;
-	u32 msg_addr_lo;
-	u32 msg_addr_hi;
+	__le16 reserved1;
+	__le32 msg_addr_lo;
+	__le32 msg_addr_hi;
 };
 
 /*
@@ -5230,9 +5224,9 @@
 struct vf_flr_event_data {
 	u8 vf_id;
 	u8 reserved0;
-	u16 reserved1;
-	u32 reserved2;
-	u32 reserved3;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
 };
 
 /*
@@ -5241,9 +5235,9 @@
 struct malicious_vf_event_data {
 	u8 vf_id;
 	u8 err_id;
-	u16 reserved1;
-	u32 reserved2;
-	u32 reserved3;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
 };
 
 /*
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index d946bba..1fb8010 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -6185,26 +6185,80 @@
 		shift -= 4;
 		digit = ((num & mask) >> shift);
 		if (digit == 0 && remove_leading_zeros) {
-			mask = mask >> 4;
-			continue;
-		} else if (digit < 0xa)
-			*str_ptr = digit + '0';
-		else
-			*str_ptr = digit - 0xa + 'a';
-		remove_leading_zeros = 0;
-		str_ptr++;
-		(*len)--;
+			*str_ptr = '0';
+		} else {
+			if (digit < 0xa)
+				*str_ptr = digit + '0';
+			else
+				*str_ptr = digit - 0xa + 'a';
+
+			remove_leading_zeros = 0;
+			str_ptr++;
+			(*len)--;
+		}
 		mask = mask >> 4;
 		if (shift == 4*4) {
+			if (remove_leading_zeros) {
+				str_ptr++;
+				(*len)--;
+			}
 			*str_ptr = '.';
 			str_ptr++;
 			(*len)--;
 			remove_leading_zeros = 1;
 		}
 	}
+	if (remove_leading_zeros)
+		(*len)--;
 	return 0;
 }
 
+static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len)
+{
+	u8 *str_ptr = str;
+	u32 mask = 0x00f00000;
+	u8 shift = 8*3;
+	u8 digit;
+	u8 remove_leading_zeros = 1;
+
+	if (*len < 10) {
+		/* Need more than 10chars for this format */
+		*str_ptr = '\0';
+		(*len)--;
+		return -EINVAL;
+	}
+
+	while (shift > 0) {
+		shift -= 4;
+		digit = ((num & mask) >> shift);
+		if (digit == 0 && remove_leading_zeros) {
+			*str_ptr = '0';
+		} else {
+			if (digit < 0xa)
+				*str_ptr = digit + '0';
+			else
+				*str_ptr = digit - 0xa + 'a';
+
+			remove_leading_zeros = 0;
+			str_ptr++;
+			(*len)--;
+		}
+		mask = mask >> 4;
+		if ((shift == 4*4) || (shift == 4*2)) {
+			if (remove_leading_zeros) {
+				str_ptr++;
+				(*len)--;
+			}
+			*str_ptr = '.';
+			str_ptr++;
+			(*len)--;
+			remove_leading_zeros = 1;
+		}
+	}
+	if (remove_leading_zeros)
+		(*len)--;
+	return 0;
+}
 
 static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len)
 {
@@ -9677,8 +9731,9 @@
 
 	if (bnx2x_is_8483x_8485x(phy)) {
 		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD, 0x400f, &fw_ver1);
-		bnx2x_save_spirom_version(bp, port, fw_ver1 & 0xfff,
-				phy->ver_addr);
+		if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+			fw_ver1 &= 0xfff;
+		bnx2x_save_spirom_version(bp, port, fw_ver1, phy->ver_addr);
 	} else {
 		/* For 32-bit registers in 848xx, access via MDIO2ARM i/f. */
 		/* (1) set reg 0xc200_0014(SPI_BRIDGE_CTRL_2) to 0x03000000 */
@@ -9732,16 +9787,32 @@
 static void bnx2x_848xx_set_led(struct bnx2x *bp,
 				struct bnx2x_phy *phy)
 {
-	u16 val, offset, i;
+	u16 val, led3_blink_rate, offset, i;
 	static struct bnx2x_reg_set reg_set[] = {
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED1_MASK, 0x0080},
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED2_MASK, 0x0018},
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_MASK, 0x0006},
-		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_BLINK, 0x0000},
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_84823_CTL_SLOW_CLK_CNT_HIGH,
 			MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ},
 		{MDIO_AN_DEVAD, 0xFFFB, 0xFFFD}
 	};
+
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+		/* Set LED5 source */
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD,
+				 MDIO_PMA_REG_8481_LED5_MASK,
+				 0x90);
+		led3_blink_rate = 0x000f;
+	} else {
+		led3_blink_rate = 0x0000;
+	}
+	/* Set LED3 BLINK */
+	bnx2x_cl45_write(bp, phy,
+			 MDIO_PMA_DEVAD,
+			 MDIO_PMA_REG_8481_LED3_BLINK,
+			 led3_blink_rate);
+
 	/* PHYC_CTL_LED_CTL */
 	bnx2x_cl45_read(bp, phy,
 			MDIO_PMA_DEVAD,
@@ -9749,6 +9820,9 @@
 	val &= 0xFE00;
 	val |= 0x0092;
 
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+		val |= 2 << 12; /* LED5 ON based on source */
+
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD,
 			 MDIO_PMA_REG_8481_LINK_SIGNAL, val);
@@ -9762,10 +9836,17 @@
 	else
 		offset = MDIO_PMA_REG_84823_CTL_LED_CTL_1;
 
-	/* stretch_en for LED3*/
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+		val = MDIO_PMA_REG_84858_ALLOW_GPHY_ACT |
+		      MDIO_PMA_REG_84823_LED3_STRETCH_EN;
+	else
+		val = MDIO_PMA_REG_84823_LED3_STRETCH_EN;
+
+	/* stretch_en for LEDs */
 	bnx2x_cl45_read_or_write(bp, phy,
-				 MDIO_PMA_DEVAD, offset,
-				 MDIO_PMA_REG_84823_LED3_STRETCH_EN);
+				 MDIO_PMA_DEVAD,
+				 offset,
+				 val);
 }
 
 static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy,
@@ -9775,7 +9856,7 @@
 	struct bnx2x *bp = params->bp;
 	switch (action) {
 	case PHY_INIT:
-		if (!bnx2x_is_8483x_8485x(phy)) {
+		if (bnx2x_is_8483x_8485x(phy)) {
 			/* Save spirom version */
 			bnx2x_save_848xx_spirom_version(phy, bp, params->port);
 		}
@@ -10036,15 +10117,20 @@
 
 static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
 				struct link_params *params, u16 fw_cmd,
-				u16 cmd_args[], int argc)
+				u16 cmd_args[], int argc, int process)
 {
 	int idx;
 	u16 val;
 	struct bnx2x *bp = params->bp;
-	/* Write CMD_OPEN_OVERRIDE to STATUS reg */
-	bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-			MDIO_848xx_CMD_HDLR_STATUS,
-			PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+	int rc = 0;
+
+	if (process == PHY84833_MB_PROCESS2) {
+		/* Write CMD_OPEN_OVERRIDE to STATUS reg */
+		bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+				 MDIO_848xx_CMD_HDLR_STATUS,
+				 PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+	}
+
 	for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
 		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
 				MDIO_848xx_CMD_HDLR_STATUS, &val);
@@ -10054,15 +10140,27 @@
 	}
 	if (idx >= PHY848xx_CMDHDLR_WAIT) {
 		DP(NETIF_MSG_LINK, "FW cmd: FW not ready.\n");
+		/* if the status is CMD_COMPLETE_PASS or CMD_COMPLETE_ERROR
+		 * clear the status to CMD_CLEAR_COMPLETE
+		 */
+		if (val == PHY84833_STATUS_CMD_COMPLETE_PASS ||
+		    val == PHY84833_STATUS_CMD_COMPLETE_ERROR) {
+			bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+					 MDIO_848xx_CMD_HDLR_STATUS,
+					 PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+		}
 		return -EINVAL;
 	}
-
-	/* Prepare argument(s) and issue command */
-	for (idx = 0; idx < argc; idx++) {
-		bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-				MDIO_848xx_CMD_HDLR_DATA1 + idx,
-				cmd_args[idx]);
+	if (process == PHY84833_MB_PROCESS1 ||
+	    process == PHY84833_MB_PROCESS2) {
+		/* Prepare argument(s) */
+		for (idx = 0; idx < argc; idx++) {
+			bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+					 MDIO_848xx_CMD_HDLR_DATA1 + idx,
+					 cmd_args[idx]);
+		}
 	}
+
 	bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
 			MDIO_848xx_CMD_HDLR_COMMAND, fw_cmd);
 	for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
@@ -10076,24 +10174,30 @@
 	if ((idx >= PHY848xx_CMDHDLR_WAIT) ||
 	    (val == PHY84833_STATUS_CMD_COMPLETE_ERROR)) {
 		DP(NETIF_MSG_LINK, "FW cmd failed.\n");
-		return -EINVAL;
+		rc = -EINVAL;
 	}
-	/* Gather returning data */
-	for (idx = 0; idx < argc; idx++) {
-		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
-				MDIO_848xx_CMD_HDLR_DATA1 + idx,
-				&cmd_args[idx]);
+	if (process == PHY84833_MB_PROCESS3 && rc == 0) {
+		/* Gather returning data */
+		for (idx = 0; idx < argc; idx++) {
+			bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
+					MDIO_848xx_CMD_HDLR_DATA1 + idx,
+					&cmd_args[idx]);
+		}
 	}
-	bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-			MDIO_848xx_CMD_HDLR_STATUS,
-			PHY84833_STATUS_CMD_CLEAR_COMPLETE);
-	return 0;
+	if (val == PHY84833_STATUS_CMD_COMPLETE_ERROR ||
+	    val == PHY84833_STATUS_CMD_COMPLETE_PASS) {
+		bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+				 MDIO_848xx_CMD_HDLR_STATUS,
+				 PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+	}
+	return rc;
 }
 
 static int bnx2x_848xx_cmd_hdlr(struct bnx2x_phy *phy,
 				struct link_params *params,
 				u16 fw_cmd,
-				u16 cmd_args[], int argc)
+					   u16 cmd_args[], int argc,
+					   int process)
 {
 	struct bnx2x *bp = params->bp;
 
@@ -10106,7 +10210,7 @@
 					    argc);
 	} else {
 		return bnx2x_84833_cmd_hdlr(phy, params, fw_cmd, cmd_args,
-					    argc);
+					    argc, process);
 	}
 }
 
@@ -10133,7 +10237,7 @@
 
 	status = bnx2x_848xx_cmd_hdlr(phy, params,
 				      PHY848xx_CMD_SET_PAIR_SWAP, data,
-				      PHY848xx_CMDHDLR_MAX_ARGS);
+				      2, PHY84833_MB_PROCESS2);
 	if (status == 0)
 		DP(NETIF_MSG_LINK, "Pairswap OK, val=0x%x\n", data[1]);
 
@@ -10222,8 +10326,8 @@
 	DP(NETIF_MSG_LINK, "Don't Advertise 10GBase-T EEE\n");
 
 	/* Prevent Phy from working in EEE and advertising it */
-	rc = bnx2x_848xx_cmd_hdlr(phy, params,
-				  PHY848xx_CMD_SET_EEE_MODE, &cmd_args, 1);
+	rc = bnx2x_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+				  &cmd_args, 1, PHY84833_MB_PROCESS1);
 	if (rc) {
 		DP(NETIF_MSG_LINK, "EEE disable failed.\n");
 		return rc;
@@ -10240,8 +10344,8 @@
 	struct bnx2x *bp = params->bp;
 	u16 cmd_args = 1;
 
-	rc = bnx2x_848xx_cmd_hdlr(phy, params,
-				  PHY848xx_CMD_SET_EEE_MODE, &cmd_args, 1);
+	rc = bnx2x_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+				  &cmd_args, 1, PHY84833_MB_PROCESS1);
 	if (rc) {
 		DP(NETIF_MSG_LINK, "EEE enable failed.\n");
 		return rc;
@@ -10362,7 +10466,7 @@
 		cmd_args[3] = PHY84833_CONSTANT_LATENCY;
 		rc = bnx2x_848xx_cmd_hdlr(phy, params,
 					  PHY848xx_CMD_SET_EEE_MODE, cmd_args,
-					  PHY848xx_CMDHDLR_MAX_ARGS);
+					  4, PHY84833_MB_PROCESS1);
 		if (rc)
 			DP(NETIF_MSG_LINK, "Cfg AutogrEEEn failed.\n");
 	}
@@ -10416,6 +10520,32 @@
 		vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK;
 	}
 
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+		/* Additional settings for jumbo packets in 1000BASE-T mode */
+		/* Allow rx extended length */
+		bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+				MDIO_AN_REG_8481_AUX_CTRL, &val);
+		val |= 0x4000;
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_AUX_CTRL, val);
+		/* TX FIFO Elasticity LSB */
+		bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+				MDIO_AN_REG_8481_1G_100T_EXT_CTRL, &val);
+		val |= 0x1;
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_1G_100T_EXT_CTRL, val);
+		/* TX FIFO Elasticity MSB */
+		/* Enable expansion register 0x46 (Pattern Generator status) */
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_EXPANSION_REG_ACCESS, 0xf46);
+
+		bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+				MDIO_AN_REG_8481_EXPANSION_REG_RD_RW, &val);
+		val |= 0x4000;
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_EXPANSION_REG_RD_RW, val);
+	}
+
 	if (bnx2x_is_8483x_8485x(phy)) {
 		/* Bring PHY out of super isolate mode as the final step. */
 		bnx2x_cl45_read_and_write(bp, phy,
@@ -10555,6 +10685,17 @@
 	return link_up;
 }
 
+static int bnx2x_8485x_format_ver(u32 raw_ver, u8 *str, u16 *len)
+{
+	int status = 0;
+	u32 num;
+
+	num = ((raw_ver & 0xF80) >> 7) << 16 | ((raw_ver & 0x7F) << 8) |
+	      ((raw_ver & 0xF000) >> 12);
+	status = bnx2x_3_seq_format_ver(num, str, len);
+	return status;
+}
+
 static int bnx2x_848xx_format_ver(u32 raw_ver, u8 *str, u16 *len)
 {
 	int status = 0;
@@ -10651,10 +10792,25 @@
 					0x0);
 
 		} else {
+			/* LED 1 OFF */
 			bnx2x_cl45_write(bp, phy,
 					 MDIO_PMA_DEVAD,
 					 MDIO_PMA_REG_8481_LED1_MASK,
 					 0x0);
+
+			if (phy->type ==
+				PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				/* LED 2 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED2_MASK,
+						 0x0);
+				/* LED 3 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x0);
+			}
 		}
 		break;
 	case LED_MODE_FRONT_PANEL_OFF:
@@ -10713,6 +10869,19 @@
 						 MDIO_PMA_REG_8481_SIGNAL_MASK,
 						 0x0);
 			}
+			if (phy->type ==
+				PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				/* LED 2 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED2_MASK,
+						 0x0);
+				/* LED 3 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x0);
+			}
 		}
 		break;
 	case LED_MODE_ON:
@@ -10776,6 +10945,25 @@
 						params->port*4,
 						NIG_MASK_MI_INT);
 				}
+			}
+			if (phy->type ==
+			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				/* Tell LED3 to constant on */
+				bnx2x_cl45_read(bp, phy,
+						MDIO_PMA_DEVAD,
+						MDIO_PMA_REG_8481_LINK_SIGNAL,
+						&val);
+				val &= ~(7<<6);
+				val |= (2<<6);  /* A83B[8:6]= 2 */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LINK_SIGNAL,
+						 val);
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x20);
+			} else {
 				bnx2x_cl45_write(bp, phy,
 						 MDIO_PMA_DEVAD,
 						 MDIO_PMA_REG_8481_SIGNAL_MASK,
@@ -10854,6 +11042,17 @@
 					 MDIO_PMA_REG_8481_LINK_SIGNAL,
 					 val);
 			if (phy->type ==
+			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED2_MASK,
+						 0x18);
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x06);
+			}
+			if (phy->type ==
 			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834) {
 				/* Restore LED4 source to external link,
 				 * and re-enable interrupts.
@@ -11982,7 +12181,7 @@
 	.read_status	= (read_status_t)bnx2x_848xx_read_status,
 	.link_reset	= (link_reset_t)bnx2x_848x3_link_reset,
 	.config_loopback = (config_loopback_t)NULL,
-	.format_fw_ver	= (format_fw_ver_t)bnx2x_848xx_format_ver,
+	.format_fw_ver	= (format_fw_ver_t)bnx2x_8485x_format_ver,
 	.hw_reset	= (hw_reset_t)bnx2x_84833_hw_reset_phy,
 	.set_link_led	= (set_link_led_t)bnx2x_848xx_set_link_led,
 	.phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
@@ -13807,8 +14006,10 @@
 	if (CHIP_IS_E3(bp)) {
 		struct bnx2x_phy *phy = &params->phy[INT_PHY];
 		bnx2x_set_aer_mmd(params, phy);
-		if ((phy->supported & SUPPORTED_20000baseKR2_Full) &&
-		    (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G))
+		if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
+		     (phy->speed_cap_mask &
+		      PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) ||
+		    (phy->req_line_speed == SPEED_20000))
 			bnx2x_check_kr2_wa(params, vars, phy);
 		bnx2x_check_over_curr(params, vars);
 		if (vars->rx_tx_asic_rst)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 6c4e3a6..2bf9c87 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5280,14 +5280,14 @@
 {
 	unsigned long ramrod_flags = 0;
 	int rc = 0;
-	u32 cid = elem->message.data.eth_event.echo & BNX2X_SWCID_MASK;
+	u32 echo = le32_to_cpu(elem->message.data.eth_event.echo);
+	u32 cid = echo & BNX2X_SWCID_MASK;
 	struct bnx2x_vlan_mac_obj *vlan_mac_obj;
 
 	/* Always push next commands out, don't wait here */
 	__set_bit(RAMROD_CONT, &ramrod_flags);
 
-	switch (le32_to_cpu((__force __le32)elem->message.data.eth_event.echo)
-			    >> BNX2X_SWCID_SHIFT) {
+	switch (echo >> BNX2X_SWCID_SHIFT) {
 	case BNX2X_FILTER_MAC_PENDING:
 		DP(BNX2X_MSG_SP, "Got SETUP_MAC completions\n");
 		if (CNIC_LOADED(bp) && (cid == BNX2X_ISCSI_ETH_CID(bp)))
@@ -5308,8 +5308,7 @@
 		bnx2x_handle_mcast_eqe(bp);
 		return;
 	default:
-		BNX2X_ERR("Unsupported classification command: %d\n",
-			  elem->message.data.eth_event.echo);
+		BNX2X_ERR("Unsupported classification command: 0x%x\n", echo);
 		return;
 	}
 
@@ -5478,9 +5477,6 @@
 			goto next_spqe;
 		}
 
-		/* elem CID originates from FW; actually LE */
-		cid = SW_CID((__force __le32)
-			     elem->message.data.cfc_del_event.cid);
 		opcode = elem->message.opcode;
 
 		/* handle eq element */
@@ -5503,6 +5499,10 @@
 			 * we may want to verify here that the bp state is
 			 * HALTING
 			 */
+
+			/* elem CID originates from FW; actually LE */
+			cid = SW_CID(elem->message.data.cfc_del_event.cid);
+
 			DP(BNX2X_MSG_SP,
 			   "got delete ramrod for MULTI[%d]\n", cid);
 
@@ -5596,10 +5596,8 @@
 		      BNX2X_STATE_OPENING_WAIT4_PORT):
 		case (EVENT_RING_OPCODE_RSS_UPDATE_RULES |
 		      BNX2X_STATE_CLOSING_WAIT4_HALT):
-			cid = elem->message.data.eth_event.echo &
-				BNX2X_SWCID_MASK;
 			DP(BNX2X_MSG_SP, "got RSS_UPDATE ramrod. CID %d\n",
-			   cid);
+			   SW_CID(elem->message.data.eth_event.echo));
 			rss_raw->clear_pending(rss_raw);
 			break;
 
@@ -5684,7 +5682,7 @@
 		if (status & BNX2X_DEF_SB_IDX) {
 			struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp);
 
-		if (FCOE_INIT(bp) &&
+			if (FCOE_INIT(bp) &&
 			    (bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
 				/* Prevent local bottom-halves from running as
 				 * we are going to change the local NAPI list.
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 4dead49..a43dea2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -7296,6 +7296,8 @@
 #define MDIO_PMA_REG_84823_CTL_LED_CTL_1			0xa8e3
 #define MDIO_PMA_REG_84833_CTL_LED_CTL_1			0xa8ec
 #define MDIO_PMA_REG_84823_LED3_STRETCH_EN			0x0080
+/* BCM84858 only */
+#define MDIO_PMA_REG_84858_ALLOW_GPHY_ACT			0x8000
 
 /* BCM84833 only */
 #define MDIO_84833_TOP_CFG_FW_REV			0x400f
@@ -7337,6 +7339,10 @@
 #define PHY84833_STATUS_CMD_NOT_OPEN_FOR_CMDS		0x0040
 #define PHY84833_STATUS_CMD_CLEAR_COMPLETE		0x0080
 #define PHY84833_STATUS_CMD_OPEN_OVERRIDE		0xa5a5
+/* Mailbox Process */
+#define PHY84833_MB_PROCESS1				1
+#define PHY84833_MB_PROCESS2				2
+#define PHY84833_MB_PROCESS3				3
 
 /* Mailbox status set used by 84858 only */
 #define PHY84858_STATUS_CMD_RECEIVED			0x0001
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9d02734..632daff 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1672,11 +1672,12 @@
 {
 	unsigned long ramrod_flags = 0;
 	int rc = 0;
+	u32 echo = le32_to_cpu(elem->message.data.eth_event.echo);
 
 	/* Always push next commands out, don't wait here */
 	set_bit(RAMROD_CONT, &ramrod_flags);
 
-	switch (elem->message.data.eth_event.echo >> BNX2X_SWCID_SHIFT) {
+	switch (echo >> BNX2X_SWCID_SHIFT) {
 	case BNX2X_FILTER_MAC_PENDING:
 		rc = vfq->mac_obj.complete(bp, &vfq->mac_obj, elem,
 					   &ramrod_flags);
@@ -1686,8 +1687,7 @@
 					    &ramrod_flags);
 		break;
 	default:
-		BNX2X_ERR("Unsupported classification command: %d\n",
-			  elem->message.data.eth_event.echo);
+		BNX2X_ERR("Unsupported classification command: 0x%x\n", echo);
 		return;
 	}
 	if (rc < 0)
@@ -1747,16 +1747,14 @@
 
 	switch (opcode) {
 	case EVENT_RING_OPCODE_CFC_DEL:
-		cid = SW_CID((__force __le32)
-			     elem->message.data.cfc_del_event.cid);
+		cid = SW_CID(elem->message.data.cfc_del_event.cid);
 		DP(BNX2X_MSG_IOV, "checking cfc-del comp cid=%d\n", cid);
 		break;
 	case EVENT_RING_OPCODE_CLASSIFICATION_RULES:
 	case EVENT_RING_OPCODE_MULTICAST_RULES:
 	case EVENT_RING_OPCODE_FILTERS_RULES:
 	case EVENT_RING_OPCODE_RSS_UPDATE_RULES:
-		cid = (elem->message.data.eth_event.echo &
-		       BNX2X_SWCID_MASK);
+		cid = SW_CID(elem->message.data.eth_event.echo);
 		DP(BNX2X_MSG_IOV, "checking filtering comp cid=%d\n", cid);
 		break;
 	case EVENT_RING_OPCODE_VF_FLR:
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 1374e53..bfae300 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -2187,8 +2187,10 @@
 
 	/* Update VFDB with current message and schedule its handling */
 	mutex_lock(&BP_VFDB(bp)->event_mutex);
-	BP_VF_MBX(bp, vf_idx)->vf_addr_hi = vfpf_event->msg_addr_hi;
-	BP_VF_MBX(bp, vf_idx)->vf_addr_lo = vfpf_event->msg_addr_lo;
+	BP_VF_MBX(bp, vf_idx)->vf_addr_hi =
+		le32_to_cpu(vfpf_event->msg_addr_hi);
+	BP_VF_MBX(bp, vf_idx)->vf_addr_lo =
+		le32_to_cpu(vfpf_event->msg_addr_lo);
 	BP_VFDB(bp)->event_occur |= (1ULL << vf_idx);
 	mutex_unlock(&BP_VFDB(bp)->event_mutex);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index df835f5..82f1913 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -69,7 +69,7 @@
 #define BNXT_RX_DMA_OFFSET NET_SKB_PAD
 #define BNXT_RX_COPY_THRESH 256
 
-#define BNXT_TX_PUSH_THRESH 92
+#define BNXT_TX_PUSH_THRESH 164
 
 enum board_idx {
 	BCM57301,
@@ -223,11 +223,12 @@
 	}
 
 	if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
-		struct tx_push_bd *push = txr->tx_push;
-		struct tx_bd *tx_push = &push->txbd1;
-		struct tx_bd_ext *tx_push1 = &push->txbd2;
-		void *pdata = tx_push1 + 1;
-		int j;
+		struct tx_push_buffer *tx_push_buf = txr->tx_push;
+		struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
+		struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
+		void *pdata = tx_push_buf->data;
+		u64 *end;
+		int j, push_len;
 
 		/* Set COAL_NOW to be ready quickly for the next push */
 		tx_push->tx_bd_len_flags_type =
@@ -247,6 +248,10 @@
 		tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
 		tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
 
+		end = pdata + length;
+		end = PTR_ALIGN(end, 8) - 1;
+		*end = 0;
+
 		skb_copy_from_linear_data(skb, pdata, len);
 		pdata += len;
 		for (j = 0; j < last_frag; j++) {
@@ -261,22 +266,29 @@
 			pdata += skb_frag_size(frag);
 		}
 
-		memcpy(txbd, tx_push, sizeof(*txbd));
+		txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
+		txbd->tx_bd_haddr = txr->data_mapping;
 		prod = NEXT_TX(prod);
 		txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
 		memcpy(txbd, tx_push1, sizeof(*txbd));
 		prod = NEXT_TX(prod);
-		push->doorbell =
+		tx_push->doorbell =
 			cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
 		txr->tx_prod = prod;
 
 		netdev_tx_sent_queue(txq, skb->len);
 
-		__iowrite64_copy(txr->tx_doorbell, push,
-				 (length + sizeof(*push) + 8) / 8);
+		push_len = (length + sizeof(*tx_push) + 7) / 8;
+		if (push_len > 16) {
+			__iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
+			__iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+					 push_len - 16);
+		} else {
+			__iowrite64_copy(txr->tx_doorbell, tx_push_buf,
+					 push_len);
+		}
 
 		tx_buf->is_push = 1;
-
 		goto tx_done;
 	}
 
@@ -1490,10 +1502,11 @@
 
 			last = tx_buf->nr_frags;
 			j += 2;
-			for (k = 0; k < last; k++, j = NEXT_TX(j)) {
+			for (k = 0; k < last; k++, j++) {
+				int ring_idx = j & bp->tx_ring_mask;
 				skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
 
-				tx_buf = &txr->tx_buf_ring[j];
+				tx_buf = &txr->tx_buf_ring[ring_idx];
 				dma_unmap_page(
 					&pdev->dev,
 					dma_unmap_addr(tx_buf, mapping),
@@ -1752,7 +1765,7 @@
 		push_size  = L1_CACHE_ALIGN(sizeof(struct tx_push_bd) +
 					bp->tx_push_thresh);
 
-		if (push_size > 128) {
+		if (push_size > 256) {
 			push_size = 0;
 			bp->tx_push_thresh = 0;
 		}
@@ -1771,7 +1784,6 @@
 			return rc;
 
 		if (bp->tx_push_size) {
-			struct tx_bd *txbd;
 			dma_addr_t mapping;
 
 			/* One pre-allocated DMA buffer to backup
@@ -1785,13 +1797,11 @@
 			if (!txr->tx_push)
 				return -ENOMEM;
 
-			txbd = &txr->tx_push->txbd1;
-
 			mapping = txr->tx_push_mapping +
 				sizeof(struct tx_push_bd);
-			txbd->tx_bd_haddr = cpu_to_le64(mapping);
+			txr->data_mapping = cpu_to_le64(mapping);
 
-			memset(txbd + 1, 0, sizeof(struct tx_bd_ext));
+			memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
 		}
 		ring->queue_id = bp->q_info[j].queue_id;
 		if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -3406,7 +3416,7 @@
 	struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 error_code;
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, -1, -1);
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
 	req.ring_type = ring_type;
 	req.ring_id = cpu_to_le16(ring->fw_ring_id);
 
@@ -4545,20 +4555,18 @@
 	if (!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) &&
 	    link_info->force_pause_setting != link_info->req_flow_ctrl)
 		update_pause = true;
-	if (link_info->req_duplex != link_info->duplex_setting)
-		update_link = true;
 	if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
 		if (BNXT_AUTO_MODE(link_info->auto_mode))
 			update_link = true;
 		if (link_info->req_link_speed != link_info->force_link_speed)
 			update_link = true;
+		if (link_info->req_duplex != link_info->duplex_setting)
+			update_link = true;
 	} else {
 		if (link_info->auto_mode == BNXT_LINK_AUTO_NONE)
 			update_link = true;
 		if (link_info->advertising != link_info->auto_link_speeds)
 			update_link = true;
-		if (link_info->req_link_speed != link_info->auto_link_speed)
-			update_link = true;
 	}
 
 	if (update_link)
@@ -4635,7 +4643,7 @@
 	if (link_re_init) {
 		rc = bnxt_update_phy_setting(bp);
 		if (rc)
-			goto open_err;
+			netdev_warn(bp->dev, "failed to update phy settings\n");
 	}
 
 	if (irq_re_init) {
@@ -4653,6 +4661,7 @@
 	/* Enable TX queues */
 	bnxt_tx_enable(bp);
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
+	bnxt_update_link(bp, true);
 
 	return 0;
 
@@ -4819,8 +4828,6 @@
 
 		stats->multicast += le64_to_cpu(hw_stats->rx_mcast_pkts);
 
-		stats->rx_dropped += le64_to_cpu(hw_stats->rx_drop_pkts);
-
 		stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
 	}
 
@@ -5671,22 +5678,16 @@
 	}
 
 	/*initialize the ethool setting copy with NVM settings */
-	if (BNXT_AUTO_MODE(link_info->auto_mode))
-		link_info->autoneg |= BNXT_AUTONEG_SPEED;
-
-	if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
-		if (link_info->auto_pause_setting == BNXT_LINK_PAUSE_BOTH)
-			link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+		link_info->autoneg = BNXT_AUTONEG_SPEED |
+				     BNXT_AUTONEG_FLOW_CTRL;
+		link_info->advertising = link_info->auto_link_speeds;
 		link_info->req_flow_ctrl = link_info->auto_pause_setting;
-	} else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+	} else {
+		link_info->req_link_speed = link_info->force_link_speed;
+		link_info->req_duplex = link_info->duplex_setting;
 		link_info->req_flow_ctrl = link_info->force_pause_setting;
 	}
-	link_info->req_duplex = link_info->duplex_setting;
-	if (link_info->autoneg & BNXT_AUTONEG_SPEED)
-		link_info->req_link_speed = link_info->auto_link_speed;
-	else
-		link_info->req_link_speed = link_info->force_link_speed;
-	link_info->advertising = link_info->auto_link_speeds;
 	snprintf(phy_ver, PHY_VER_STR_LEN, " ph %d.%d.%d",
 		 link_info->phy_ver[0],
 		 link_info->phy_ver[1],
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 8af3ca8..2be51b3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -411,8 +411,8 @@
 
 #define BNXT_NUM_TESTS(bp)	0
 
-#define BNXT_DEFAULT_RX_RING_SIZE	1023
-#define BNXT_DEFAULT_TX_RING_SIZE	512
+#define BNXT_DEFAULT_RX_RING_SIZE	511
+#define BNXT_DEFAULT_TX_RING_SIZE	511
 
 #define MAX_TPA		64
 
@@ -523,10 +523,16 @@
 
 struct tx_push_bd {
 	__le32			doorbell;
-	struct tx_bd		txbd1;
+	__le32			tx_bd_len_flags_type;
+	u32			tx_bd_opaque;
 	struct tx_bd_ext	txbd2;
 };
 
+struct tx_push_buffer {
+	struct tx_push_bd	push_bd;
+	u32			data[25];
+};
+
 struct bnxt_tx_ring_info {
 	struct bnxt_napi	*bnapi;
 	u16			tx_prod;
@@ -538,8 +544,9 @@
 
 	dma_addr_t		tx_desc_mapping[MAX_TX_PAGES];
 
-	struct tx_push_bd	*tx_push;
+	struct tx_push_buffer	*tx_push;
 	dma_addr_t		tx_push_mapping;
+	__le64			data_mapping;
 
 #define BNXT_DEV_STATE_CLOSING	0x1
 	u32			dev_state;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 922b898..3238817 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -486,15 +486,8 @@
 		speed_mask |= SUPPORTED_2500baseX_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
 		speed_mask |= SUPPORTED_10000baseT_Full;
-	/* TODO: support 25GB, 50GB with different cable type */
-	if (fw_speeds & BNXT_LINK_SPEED_MSK_20GB)
-		speed_mask |= SUPPORTED_20000baseMLD2_Full |
-			SUPPORTED_20000baseKR2_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-		speed_mask |= SUPPORTED_40000baseKR4_Full |
-			SUPPORTED_40000baseCR4_Full |
-			SUPPORTED_40000baseSR4_Full |
-			SUPPORTED_40000baseLR4_Full;
+		speed_mask |= SUPPORTED_40000baseCR4_Full;
 
 	return speed_mask;
 }
@@ -514,15 +507,8 @@
 		speed_mask |= ADVERTISED_2500baseX_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
 		speed_mask |= ADVERTISED_10000baseT_Full;
-	/* TODO: how to advertise 20, 25, 40, 50GB with different cable type ?*/
-	if (fw_speeds & BNXT_LINK_SPEED_MSK_20GB)
-		speed_mask |= ADVERTISED_20000baseMLD2_Full |
-			      ADVERTISED_20000baseKR2_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-		speed_mask |= ADVERTISED_40000baseKR4_Full |
-			      ADVERTISED_40000baseCR4_Full |
-			      ADVERTISED_40000baseSR4_Full |
-			      ADVERTISED_40000baseLR4_Full;
+		speed_mask |= ADVERTISED_40000baseCR4_Full;
 	return speed_mask;
 }
 
@@ -557,11 +543,12 @@
 	u16 ethtool_speed;
 
 	cmd->supported = bnxt_fw_to_ethtool_support_spds(link_info);
+	cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 
 	if (link_info->auto_link_speeds)
 		cmd->supported |= SUPPORTED_Autoneg;
 
-	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+	if (link_info->autoneg) {
 		cmd->advertising =
 			bnxt_fw_to_ethtool_advertised_spds(link_info);
 		cmd->advertising |= ADVERTISED_Autoneg;
@@ -570,28 +557,16 @@
 		cmd->autoneg = AUTONEG_DISABLE;
 		cmd->advertising = 0;
 	}
-	if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+	if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) {
 		if ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
 		    BNXT_LINK_PAUSE_BOTH) {
 			cmd->advertising |= ADVERTISED_Pause;
-			cmd->supported |= SUPPORTED_Pause;
 		} else {
 			cmd->advertising |= ADVERTISED_Asym_Pause;
-			cmd->supported |= SUPPORTED_Asym_Pause;
 			if (link_info->auto_pause_setting &
 			    BNXT_LINK_PAUSE_RX)
 				cmd->advertising |= ADVERTISED_Pause;
 		}
-	} else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
-		if ((link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
-		    BNXT_LINK_PAUSE_BOTH) {
-			cmd->supported |= SUPPORTED_Pause;
-		} else {
-			cmd->supported |= SUPPORTED_Asym_Pause;
-			if (link_info->force_pause_setting &
-			    BNXT_LINK_PAUSE_RX)
-				cmd->supported |= SUPPORTED_Pause;
-		}
 	}
 
 	cmd->port = PORT_NONE;
@@ -670,6 +645,9 @@
 	if (advertising & ADVERTISED_10000baseT_Full)
 		fw_speed_mask |= BNXT_LINK_SPEED_MSK_10GB;
 
+	if (advertising & ADVERTISED_40000baseCR4_Full)
+		fw_speed_mask |= BNXT_LINK_SPEED_MSK_40GB;
+
 	return fw_speed_mask;
 }
 
@@ -729,7 +707,7 @@
 		speed = ethtool_cmd_speed(cmd);
 		link_info->req_link_speed = bnxt_get_fw_speed(dev, speed);
 		link_info->req_duplex = BNXT_LINK_DUPLEX_FULL;
-		link_info->autoneg &= ~BNXT_AUTONEG_SPEED;
+		link_info->autoneg = 0;
 		link_info->advertising = 0;
 	}
 
@@ -748,8 +726,7 @@
 
 	if (BNXT_VF(bp))
 		return;
-	epause->autoneg = !!(link_info->auto_pause_setting &
-			     BNXT_LINK_PAUSE_BOTH);
+	epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL);
 	epause->rx_pause = ((link_info->pause & BNXT_LINK_PAUSE_RX) != 0);
 	epause->tx_pause = ((link_info->pause & BNXT_LINK_PAUSE_TX) != 0);
 }
@@ -765,6 +742,9 @@
 		return rc;
 
 	if (epause->autoneg) {
+		if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
+			return -EINVAL;
+
 		link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
 		link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_BOTH;
 	} else {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index b15a60d..d7e01a7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2445,8 +2445,7 @@
 	}
 
 	/* Link UP/DOWN event */
-	if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-	    (priv->irq0_stat & UMAC_IRQ_LINK_EVENT)) {
+	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
 		phy_mac_interrupt(priv->phydev,
 				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
 		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 0d77596..457c3bc 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -401,7 +401,7 @@
 	 * Ethernet MAC ISRs
 	 */
 	if (priv->internal_phy)
-		priv->mii_bus->irq[phydev->mdio.addr] = PHY_IGNORE_INTERRUPT;
+		priv->phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 9293675..3010080 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7831,6 +7831,14 @@
 	return ret;
 }
 
+static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb)
+{
+	/* Check if we will never have enough descriptors,
+	 * as gso_segs can be more than current ring size
+	 */
+	return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3;
+}
+
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
@@ -7934,14 +7942,19 @@
 		 * vlan encapsulated.
 		 */
 		if (skb->protocol == htons(ETH_P_8021Q) ||
-		    skb->protocol == htons(ETH_P_8021AD))
-			return tg3_tso_bug(tp, tnapi, txq, skb);
+		    skb->protocol == htons(ETH_P_8021AD)) {
+			if (tg3_tso_bug_gso_check(tnapi, skb))
+				return tg3_tso_bug(tp, tnapi, txq, skb);
+			goto drop;
+		}
 
 		if (!skb_is_gso_v6(skb)) {
 			if (unlikely((ETH_HLEN + hdr_len) > 80) &&
-			    tg3_flag(tp, TSO_BUG))
-				return tg3_tso_bug(tp, tnapi, txq, skb);
-
+			    tg3_flag(tp, TSO_BUG)) {
+				if (tg3_tso_bug_gso_check(tnapi, skb))
+					return tg3_tso_bug(tp, tnapi, txq, skb);
+				goto drop;
+			}
 			ip_csum = iph->check;
 			ip_tot_len = iph->tot_len;
 			iph->check = 0;
@@ -8073,7 +8086,7 @@
 	if (would_hit_hwbug) {
 		tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
 
-		if (mss) {
+		if (mss && tg3_tso_bug_gso_check(tnapi, skb)) {
 			/* If it's a TSO packet, do GSO instead of
 			 * allocating and copying to a large linear SKB
 			 */
@@ -12016,7 +12029,7 @@
 	int ret;
 	u32 offset, len, b_offset, odd_len;
 	u8 *buf;
-	__be32 start, end;
+	__be32 start = 0, end;
 
 	if (tg3_flag(tp, NO_NVRAM) ||
 	    eeprom->magic != TG3_EEPROM_MAGIC)
diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
index 04b0d16..95bc470 100644
--- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
+++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
@@ -987,7 +987,7 @@
 	if (!list_empty(&rxf->ucast_pending_add_q)) {
 		mac = list_first_entry(&rxf->ucast_pending_add_q,
 				       struct bna_mac, qe);
-		list_add_tail(&mac->qe, &rxf->ucast_active_q);
+		list_move_tail(&mac->qe, &rxf->ucast_active_q);
 		bna_bfi_ucast_req(rxf, mac, BFI_ENET_H2I_MAC_UCAST_ADD_REQ);
 		return 1;
 	}
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 9d9984a..50c9410 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2823,7 +2823,7 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *phy_node;
 	const struct macb_config *macb_config = NULL;
-	struct clk *pclk, *hclk, *tx_clk;
+	struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
 	unsigned int queue_mask, num_queues;
 	struct macb_platform_data *pdata;
 	bool native_io;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index b895044..34d269c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1526,7 +1526,6 @@
 				struct timespec64 *ts)
 {
 	u64 ns;
-	u32 remainder;
 	unsigned long flags;
 	struct lio *lio = container_of(ptp, struct lio, ptp_info);
 	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
@@ -1536,8 +1535,7 @@
 	ns += lio->ptp_adjust;
 	spin_unlock_irqrestore(&lio->ptp_lock, flags);
 
-	ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder);
-	ts->tv_nsec = remainder;
+	*ts = ns_to_timespec64(ns);
 
 	return 0;
 }
@@ -1685,7 +1683,7 @@
 	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
 	/* droq creation and local register settings. */
 	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
-	if (ret_val == -1)
+	if (ret_val < 0)
 		return ret_val;
 
 	if (ret_val == 1) {
@@ -2526,7 +2524,7 @@
 
 	octeon_swap_8B_data(&resp->timestamp, 1);
 
-	if (unlikely((skb_shinfo(skb)->tx_flags | SKBTX_IN_PROGRESS) != 0)) {
+	if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) != 0)) {
 		struct skb_shared_hwtstamps ts;
 		u64 ns = resp->timestamp;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 4dba86e..174072b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -983,5 +983,5 @@
 
 create_droq_fail:
 	octeon_delete_droq(oct, q_no);
-	return -1;
+	return -ENOMEM;
 }
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 6888288..34e9ace 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -116,6 +116,15 @@
 #define NIC_PF_INTR_ID_MBOX0		8
 #define NIC_PF_INTR_ID_MBOX1		9
 
+/* Minimum FIFO level before all packets for the CQ are dropped
+ *
+ * This value ensures that once a packet has been "accepted"
+ * for reception it will not get dropped due to non-availability
+ * of CQ descriptor. An errata in HW mandates this value to be
+ * atleast 0x100.
+ */
+#define NICPF_CQM_MIN_DROP_LEVEL       0x100
+
 /* Global timer for CQ timer thresh interrupts
  * Calculated for SCLK of 700Mhz
  * value written should be a 1/16th of what is expected
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 4dded90..95f17f8 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -304,6 +304,7 @@
 static void nic_init_hw(struct nicpf *nic)
 {
 	int i;
+	u64 cqm_cfg;
 
 	/* Enable NIC HW block */
 	nic_reg_write(nic, NIC_PF_CFG, 0x3);
@@ -340,6 +341,11 @@
 	/* Enable VLAN ethertype matching and stripping */
 	nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
 		      (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
+
+	/* Check if HW expected value is higher (could be in future chips) */
+	cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
+	if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
+		nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
 }
 
 /* Channel parse index configuration */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index dd536be..afb10e3 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h
@@ -21,7 +21,7 @@
 #define   NIC_PF_TCP_TIMER			(0x0060)
 #define   NIC_PF_BP_CFG				(0x0080)
 #define   NIC_PF_RRM_CFG			(0x0088)
-#define   NIC_PF_CQM_CF				(0x00A0)
+#define   NIC_PF_CQM_CFG			(0x00A0)
 #define   NIC_PF_CNM_CF				(0x00A8)
 #define   NIC_PF_CNM_STATUS			(0x00B0)
 #define   NIC_PF_CQ_AVG_CFG			(0x00C0)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index c24cb2a..a009bc3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -574,8 +574,7 @@
 
 static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 				  struct napi_struct *napi,
-				  struct cmp_queue *cq,
-				  struct cqe_rx_t *cqe_rx, int cqe_type)
+				  struct cqe_rx_t *cqe_rx)
 {
 	struct sk_buff *skb;
 	struct nicvf *nic = netdev_priv(netdev);
@@ -591,7 +590,7 @@
 	}
 
 	/* Check for errors */
-	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
+	err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
 	if (err && !cqe_rx->rb_cnt)
 		return;
 
@@ -682,8 +681,7 @@
 			   cq_idx, cq_desc->cqe_type);
 		switch (cq_desc->cqe_type) {
 		case CQE_TYPE_RX:
-			nicvf_rcv_pkt_handler(netdev, napi, cq,
-					      cq_desc, CQE_TYPE_RX);
+			nicvf_rcv_pkt_handler(netdev, napi, cq_desc);
 			work_done++;
 		break;
 		case CQE_TYPE_SEND:
@@ -1125,7 +1123,6 @@
 
 	/* Clear multiqset info */
 	nic->pnicvf = nic;
-	nic->sqs_count = 0;
 
 	return 0;
 }
@@ -1354,6 +1351,9 @@
 	drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
 				  stats->tx_bcast_frames_ok +
 				  stats->tx_mcast_frames_ok;
+	drv_stats->rx_frames_ok = stats->rx_ucast_frames +
+				  stats->rx_bcast_frames +
+				  stats->rx_mcast_frames;
 	drv_stats->rx_drops = stats->rx_drop_red +
 			      stats->rx_drop_overrun;
 	drv_stats->tx_drops = stats->tx_drops;
@@ -1538,6 +1538,9 @@
 
 	nicvf_send_vf_struct(nic);
 
+	if (!pass1_silicon(nic->pdev))
+		nic->hw_tso = true;
+
 	/* Check if this VF is in QS only mode */
 	if (nic->sqs_mode)
 		return 0;
@@ -1557,9 +1560,6 @@
 
 	netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 
-	if (!pass1_silicon(nic->pdev))
-		nic->hw_tso = true;
-
 	netdev->netdev_ops = &nicvf_netdev_ops;
 	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index d0d1b54..767347b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1329,16 +1329,12 @@
 }
 
 /* Check for errors in the receive cmp.queue entry */
-int nicvf_check_cqe_rx_errs(struct nicvf *nic,
-			    struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
 	struct nicvf_hw_stats *stats = &nic->hw_stats;
-	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
 
-	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
-		drv_stats->rx_frames_ok++;
+	if (!cqe_rx->err_level && !cqe_rx->err_opcode)
 		return 0;
-	}
 
 	if (netif_msg_rx_err(nic))
 		netdev_err(nic->netdev,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index c5030a7..6673e11 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -338,8 +338,7 @@
 /* Stats */
 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
-int nicvf_check_cqe_rx_errs(struct nicvf *nic,
-			    struct cmp_queue *cq, struct cqe_rx_t *cqe_rx);
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
 int nicvf_check_cqe_tx_errs(struct nicvf *nic,
 			    struct cmp_queue *cq, struct cqe_send_t *cqe_tx);
 #endif /* NICVF_QUEUES_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index ee04caa..a89721f 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -681,6 +681,24 @@
 	return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
+static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+{
+	char tok[len + 1];
+
+	memcpy(tok, s, len);
+	tok[len] = 0;
+	return kstrtouint(strim(tok), base, val);
+}
+
+static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+{
+	char tok[len + 1];
+
+	memcpy(tok, s, len);
+	tok[len] = 0;
+	return kstrtou16(strim(tok), base, val);
+}
+
 /**
  *	get_vpd_params - read VPD parameters from VPD EEPROM
  *	@adapter: adapter to read
@@ -709,19 +727,19 @@
 			return ret;
 	}
 
-	ret = kstrtouint(vpd.cclk_data, 10, &p->cclk);
+	ret = vpdstrtouint(vpd.cclk_data, vpd.cclk_len, 10, &p->cclk);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.mclk_data, 10, &p->mclk);
+	ret = vpdstrtouint(vpd.mclk_data, vpd.mclk_len, 10, &p->mclk);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.uclk_data, 10, &p->uclk);
+	ret = vpdstrtouint(vpd.uclk_data, vpd.uclk_len, 10, &p->uclk);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.mdc_data, 10, &p->mdc);
+	ret = vpdstrtouint(vpd.mdc_data, vpd.mdc_len, 10, &p->mdc);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.mt_data, 10, &p->mem_timing);
+	ret = vpdstrtouint(vpd.mt_data, vpd.mt_len, 10, &p->mem_timing);
 	if (ret)
 		return ret;
 	memcpy(p->sn, vpd.sn_data, SERNUM_LEN);
@@ -733,10 +751,12 @@
 	} else {
 		p->port_type[0] = hex_to_bin(vpd.port0_data[0]);
 		p->port_type[1] = hex_to_bin(vpd.port1_data[0]);
-		ret = kstrtou16(vpd.xaui0cfg_data, 16, &p->xauicfg[0]);
+		ret = vpdstrtou16(vpd.xaui0cfg_data, vpd.xaui0cfg_len, 16,
+				  &p->xauicfg[0]);
 		if (ret)
 			return ret;
-		ret = kstrtou16(vpd.xaui1cfg_data, 16, &p->xauicfg[1]);
+		ret = vpdstrtou16(vpd.xaui1cfg_data, vpd.xaui1cfg_len, 16,
+				  &p->xauicfg[1]);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index a8dda63..06bc2d2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -165,6 +165,7 @@
 	CH_PCI_ID_TABLE_FENTRY(0x5098),	/* Custom 2x40G QSFP */
 	CH_PCI_ID_TABLE_FENTRY(0x5099),	/* Custom 2x40G QSFP */
 	CH_PCI_ID_TABLE_FENTRY(0x509a),	/* Custom T520-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x509b),	/* Custom T540-CR LOM */
 
 	/* T6 adapters:
 	 */
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 1671fa3..7ba6d53 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -33,7 +33,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"2.3.0.12"
+#define DRV_VERSION		"2.3.0.20"
 #define DRV_COPYRIGHT		"Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX		6
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 1ffd105..1fdf5fe 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -298,7 +298,8 @@
 			  int wait)
 {
 	struct devcmd2_controller *dc2c = vdev->devcmd2;
-	struct devcmd2_result *result = dc2c->result + dc2c->next_result;
+	struct devcmd2_result *result;
+	u8 color;
 	unsigned int i;
 	int delay, err;
 	u32 fetch_index, new_posted;
@@ -336,13 +337,17 @@
 	if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT)
 		return 0;
 
+	result = dc2c->result + dc2c->next_result;
+	color = dc2c->color;
+
+	dc2c->next_result++;
+	if (dc2c->next_result == dc2c->result_size) {
+		dc2c->next_result = 0;
+		dc2c->color = dc2c->color ? 0 : 1;
+	}
+
 	for (delay = 0; delay < wait; delay++) {
-		if (result->color == dc2c->color) {
-			dc2c->next_result++;
-			if (dc2c->next_result == dc2c->result_size) {
-				dc2c->next_result = 0;
-				dc2c->color = dc2c->color ? 0 : 1;
-			}
+		if (result->color == color) {
 			if (result->error) {
 				err = result->error;
 				if (err != ERR_ECMDUNKNOWN ||
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index cf94b72..48d9194 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -128,7 +128,6 @@
 	struct resource *data_res;
 	struct resource	*addr_req;   /* resources requested */
 	struct resource *data_req;
-	struct resource *irq_res;
 
 	int		 irq_wake;
 
@@ -1300,22 +1299,16 @@
 dm9000_open(struct net_device *dev)
 {
 	struct board_info *db = netdev_priv(dev);
-	unsigned long irqflags = db->irq_res->flags & IRQF_TRIGGER_MASK;
 
 	if (netif_msg_ifup(db))
 		dev_dbg(db->dev, "enabling %s\n", dev->name);
 
-	/* If there is no IRQ type specified, default to something that
-	 * may work, and tell the user that this is a problem */
-
-	if (irqflags == IRQF_TRIGGER_NONE)
-		irqflags = irq_get_trigger_type(dev->irq);
-
-	if (irqflags == IRQF_TRIGGER_NONE)
+	/* If there is no IRQ type specified, tell the user that this is a
+	 * problem
+	 */
+	if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
 		dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
-	irqflags |= IRQF_SHARED;
-
 	/* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
 	iow(db, DM9000_GPR, 0);	/* REG_1F bit0 activate phyxcer */
 	mdelay(1); /* delay needs by DM9000B */
@@ -1323,7 +1316,8 @@
 	/* Initialize DM9000 board */
 	dm9000_init_dm9000(dev);
 
-	if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
+	if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
+			dev->name, dev))
 		return -EAGAIN;
 	/* Now that we have an interrupt handler hooked up we can unmask
 	 * our interrupts
@@ -1500,15 +1494,22 @@
 
 	db->addr_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	db->data_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	db->irq_res  = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 
-	if (db->addr_res == NULL || db->data_res == NULL ||
-	    db->irq_res == NULL) {
-		dev_err(db->dev, "insufficient resources\n");
+	if (!db->addr_res || !db->data_res) {
+		dev_err(db->dev, "insufficient resources addr=%p data=%p\n",
+			db->addr_res, db->data_res);
 		ret = -ENOENT;
 		goto out;
 	}
 
+	ndev->irq = platform_get_irq(pdev, 0);
+	if (ndev->irq < 0) {
+		dev_err(db->dev, "interrupt resource unavailable: %d\n",
+			ndev->irq);
+		ret = ndev->irq;
+		goto out;
+	}
+
 	db->irq_wake = platform_get_irq(pdev, 1);
 	if (db->irq_wake >= 0) {
 		dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake);
@@ -1570,7 +1571,6 @@
 
 	/* fill in parameters for net-dev structure */
 	ndev->base_addr = (unsigned long)db->io_addr;
-	ndev->irq	= db->irq_res->start;
 
 	/* ensure at least we have a default set of IO routines */
 	dm9000_set_io(db, iosize);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index cf83783..f975129 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -531,6 +531,7 @@
 
 	struct delayed_work be_err_detection_work;
 	u8 err_flags;
+	bool pcicfg_mapped;	/* pcicfg obtained via pci_iomap() */
 	u32 flags;
 	u32 cmd_privileges;
 	/* Ethtool knobs and info */
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 241819b..6d9a8d7 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -622,10 +622,13 @@
 					 BE_IF_FLAGS_VLAN_PROMISCUOUS |\
 					 BE_IF_FLAGS_MCAST_PROMISCUOUS)
 
-#define BE_IF_EN_FLAGS	(BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |\
-			BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_UNTAGGED)
+#define BE_IF_FILT_FLAGS_BASIC (BE_IF_FLAGS_BROADCAST | \
+				BE_IF_FLAGS_PASS_L3L4_ERRORS | \
+				BE_IF_FLAGS_UNTAGGED)
 
-#define BE_IF_ALL_FILT_FLAGS	(BE_IF_EN_FLAGS | BE_IF_FLAGS_ALL_PROMISCUOUS)
+#define BE_IF_ALL_FILT_FLAGS	(BE_IF_FILT_FLAGS_BASIC | \
+				 BE_IF_FLAGS_MULTICAST | \
+				 BE_IF_FLAGS_ALL_PROMISCUOUS)
 
 /* An RX interface is an object with one or more MAC addresses and
  * filtering capabilities. */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f99de36..d1cf127 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -125,6 +125,11 @@
 	"Unknown"
 };
 
+#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
+				 BE_IF_FLAGS_BROADCAST | \
+				 BE_IF_FLAGS_MULTICAST | \
+				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
+
 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 {
 	struct be_dma_mem *mem = &q->dma_mem;
@@ -3537,7 +3542,7 @@
 {
 	int status;
 
-	status = be_cmd_rx_filter(adapter, BE_IF_EN_FLAGS, ON);
+	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
 	if (status)
 		return status;
 
@@ -3857,8 +3862,7 @@
 	int status;
 
 	/* If a FW profile exists, then cap_flags are updated */
-	cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
-		    BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
+	cap_flags = BE_VF_IF_EN_FLAGS;
 
 	for_all_vfs(adapter, vf_cfg, vf) {
 		if (!BE3_chip(adapter)) {
@@ -3874,10 +3878,8 @@
 			}
 		}
 
-		en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED |
-					BE_IF_FLAGS_BROADCAST |
-					BE_IF_FLAGS_MULTICAST |
-					BE_IF_FLAGS_PASS_L3L4_ERRORS);
+		/* PF should enable IF flags during proxy if_create call */
+		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
 					  &vf_cfg->if_handle, vf + 1);
 		if (status)
@@ -4968,6 +4970,8 @@
 		pci_iounmap(adapter->pdev, adapter->csr);
 	if (adapter->db)
 		pci_iounmap(adapter->pdev, adapter->db);
+	if (adapter->pcicfg && adapter->pcicfg_mapped)
+		pci_iounmap(adapter->pdev, adapter->pcicfg);
 }
 
 static int db_bar(struct be_adapter *adapter)
@@ -5019,8 +5023,10 @@
 			if (!addr)
 				goto pci_map_err;
 			adapter->pcicfg = addr;
+			adapter->pcicfg_mapped = true;
 		} else {
 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
+			adapter->pcicfg_mapped = false;
 		}
 	}
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 62fa136..41b01064 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1265,7 +1265,6 @@
 
 		if (priv->mdio) {
 			mdiobus_unregister(priv->mdio);
-			kfree(priv->mdio->irq);
 			mdiobus_free(priv->mdio);
 		}
 		if (priv->clk)
diff --git a/drivers/net/ethernet/ezchip/Kconfig b/drivers/net/ethernet/ezchip/Kconfig
index 48ecbc8..b423ad3 100644
--- a/drivers/net/ethernet/ezchip/Kconfig
+++ b/drivers/net/ethernet/ezchip/Kconfig
@@ -18,6 +18,7 @@
 config EZCHIP_NPS_MANAGEMENT_ENET
 	tristate "EZchip NPS management enet support"
 	depends on OF_IRQ && OF_NET
+	depends on HAS_IOMEM
 	---help---
 	  Simple LAN device for debug or management purposes.
 	  Device supports interrupts for RX and TX(completion).
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 4097c58..cbe21dc 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -4,6 +4,9 @@
 
 obj-$(CONFIG_FEC) += fec.o
 fec-objs :=fec_main.o fec_ptp.o
+CFLAGS_fec_main.o := -D__CHECK_ENDIAN__
+CFLAGS_fec_ptp.o := -D__CHECK_ENDIAN__
+
 obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx.o
 ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y)
 	obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 99d33e2..2106d72 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -19,8 +19,7 @@
 #include <linux/timecounter.h>
 
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
-    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 /*
  *	Just figures, Motorola would have to change the offsets for
  *	registers in the same peripheral device on different models
@@ -190,28 +189,45 @@
 
 /*
  *	Define the buffer descriptor structure.
+ *
+ *	Evidently, ARM SoCs have the FEC block generated in a
+ *	little endian mode so adjust endianness accordingly.
  */
-#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+#if defined(CONFIG_ARM)
+#define fec32_to_cpu le32_to_cpu
+#define fec16_to_cpu le16_to_cpu
+#define cpu_to_fec32 cpu_to_le32
+#define cpu_to_fec16 cpu_to_le16
+#define __fec32 __le32
+#define __fec16 __le16
+
 struct bufdesc {
-	unsigned short cbd_datlen;	/* Data length */
-	unsigned short cbd_sc;	/* Control and status info */
-	unsigned long cbd_bufaddr;	/* Buffer address */
+	__fec16 cbd_datlen;	/* Data length */
+	__fec16 cbd_sc;		/* Control and status info */
+	__fec32 cbd_bufaddr;	/* Buffer address */
 };
 #else
+#define fec32_to_cpu be32_to_cpu
+#define fec16_to_cpu be16_to_cpu
+#define cpu_to_fec32 cpu_to_be32
+#define cpu_to_fec16 cpu_to_be16
+#define __fec32 __be32
+#define __fec16 __be16
+
 struct bufdesc {
-	unsigned short	cbd_sc;			/* Control and status info */
-	unsigned short	cbd_datlen;		/* Data length */
-	unsigned long	cbd_bufaddr;		/* Buffer address */
+	__fec16	cbd_sc;		/* Control and status info */
+	__fec16	cbd_datlen;	/* Data length */
+	__fec32	cbd_bufaddr;	/* Buffer address */
 };
 #endif
 
 struct bufdesc_ex {
 	struct bufdesc desc;
-	unsigned long cbd_esc;
-	unsigned long cbd_prot;
-	unsigned long cbd_bdu;
-	unsigned long ts;
-	unsigned short res0[4];
+	__fec32 cbd_esc;
+	__fec32 cbd_prot;
+	__fec32 cbd_bdu;
+	__fec32 ts;
+	__fec16 res0[4];
 };
 
 /*
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 502da6f..41c81f6 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -332,11 +332,13 @@
 	bdp = txq->tx_bd_base;
 
 	do {
-		pr_info("%3u %c%c 0x%04x 0x%08lx %4u %p\n",
+		pr_info("%3u %c%c 0x%04x 0x%08x %4u %p\n",
 			index,
 			bdp == txq->cur_tx ? 'S' : ' ',
 			bdp == txq->dirty_tx ? 'H' : ' ',
-			bdp->cbd_sc, bdp->cbd_bufaddr, bdp->cbd_datlen,
+			fec16_to_cpu(bdp->cbd_sc),
+			fec32_to_cpu(bdp->cbd_bufaddr),
+			fec16_to_cpu(bdp->cbd_datlen),
 			txq->tx_skbuff[index]);
 		bdp = fec_enet_get_nextdesc(bdp, fep, 0);
 		index++;
@@ -389,7 +391,7 @@
 		bdp = fec_enet_get_nextdesc(bdp, fep, queue);
 		ebdp = (struct bufdesc_ex *)bdp;
 
-		status = bdp->cbd_sc;
+		status = fec16_to_cpu(bdp->cbd_sc);
 		status &= ~BD_ENET_TX_STATS;
 		status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
 		frag_len = skb_shinfo(skb)->frags[frag].size;
@@ -411,7 +413,7 @@
 			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 			ebdp->cbd_bdu = 0;
-			ebdp->cbd_esc = estatus;
+			ebdp->cbd_esc = cpu_to_fec32(estatus);
 		}
 
 		bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
@@ -435,9 +437,9 @@
 			goto dma_mapping_error;
 		}
 
-		bdp->cbd_bufaddr = addr;
-		bdp->cbd_datlen = frag_len;
-		bdp->cbd_sc = status;
+		bdp->cbd_bufaddr = cpu_to_fec32(addr);
+		bdp->cbd_datlen = cpu_to_fec16(frag_len);
+		bdp->cbd_sc = cpu_to_fec16(status);
 	}
 
 	return bdp;
@@ -445,8 +447,8 @@
 	bdp = txq->cur_tx;
 	for (i = 0; i < frag; i++) {
 		bdp = fec_enet_get_nextdesc(bdp, fep, queue);
-		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-				bdp->cbd_datlen, DMA_TO_DEVICE);
+		dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr),
+				 fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE);
 	}
 	return ERR_PTR(-ENOMEM);
 }
@@ -483,7 +485,7 @@
 	/* Fill in a Tx ring entry */
 	bdp = txq->cur_tx;
 	last_bdp = bdp;
-	status = bdp->cbd_sc;
+	status = fec16_to_cpu(bdp->cbd_sc);
 	status &= ~BD_ENET_TX_STATS;
 
 	/* Set buffer length and buffer pointer */
@@ -539,21 +541,21 @@
 			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 
 		ebdp->cbd_bdu = 0;
-		ebdp->cbd_esc = estatus;
+		ebdp->cbd_esc = cpu_to_fec32(estatus);
 	}
 
 	index = fec_enet_get_bd_index(txq->tx_bd_base, last_bdp, fep);
 	/* Save skb pointer */
 	txq->tx_skbuff[index] = skb;
 
-	bdp->cbd_datlen = buflen;
-	bdp->cbd_bufaddr = addr;
+	bdp->cbd_datlen = cpu_to_fec16(buflen);
+	bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
 	/* Send it on its way.  Tell FEC it's ready, interrupt when done,
 	 * it's the last BD of the frame, and to put the CRC on the end.
 	 */
 	status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
-	bdp->cbd_sc = status;
+	bdp->cbd_sc = cpu_to_fec16(status);
 
 	/* If this was the last BD in the ring, start at the beginning again. */
 	bdp = fec_enet_get_nextdesc(last_bdp, fep, queue);
@@ -585,7 +587,7 @@
 	unsigned int estatus = 0;
 	dma_addr_t addr;
 
-	status = bdp->cbd_sc;
+	status = fec16_to_cpu(bdp->cbd_sc);
 	status &= ~BD_ENET_TX_STATS;
 
 	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
@@ -607,8 +609,8 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	bdp->cbd_datlen = size;
-	bdp->cbd_bufaddr = addr;
+	bdp->cbd_datlen = cpu_to_fec16(size);
+	bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
 	if (fep->bufdesc_ex) {
 		if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -616,7 +618,7 @@
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 		ebdp->cbd_bdu = 0;
-		ebdp->cbd_esc = estatus;
+		ebdp->cbd_esc = cpu_to_fec32(estatus);
 	}
 
 	/* Handle the last BD specially */
@@ -625,10 +627,10 @@
 	if (is_last) {
 		status |= BD_ENET_TX_INTR;
 		if (fep->bufdesc_ex)
-			ebdp->cbd_esc |= BD_ENET_TX_INT;
+			ebdp->cbd_esc |= cpu_to_fec32(BD_ENET_TX_INT);
 	}
 
-	bdp->cbd_sc = status;
+	bdp->cbd_sc = cpu_to_fec16(status);
 
 	return 0;
 }
@@ -647,7 +649,7 @@
 	unsigned short status;
 	unsigned int estatus = 0;
 
-	status = bdp->cbd_sc;
+	status = fec16_to_cpu(bdp->cbd_sc);
 	status &= ~BD_ENET_TX_STATS;
 	status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
 
@@ -671,8 +673,8 @@
 		}
 	}
 
-	bdp->cbd_bufaddr = dmabuf;
-	bdp->cbd_datlen = hdr_len;
+	bdp->cbd_bufaddr = cpu_to_fec32(dmabuf);
+	bdp->cbd_datlen = cpu_to_fec16(hdr_len);
 
 	if (fep->bufdesc_ex) {
 		if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -680,10 +682,10 @@
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 		ebdp->cbd_bdu = 0;
-		ebdp->cbd_esc = estatus;
+		ebdp->cbd_esc = cpu_to_fec32(estatus);
 	}
 
-	bdp->cbd_sc = status;
+	bdp->cbd_sc = cpu_to_fec16(status);
 
 	return 0;
 }
@@ -823,15 +825,15 @@
 
 			/* Initialize the BD for every fragment in the page. */
 			if (bdp->cbd_bufaddr)
-				bdp->cbd_sc = BD_ENET_RX_EMPTY;
+				bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
 			else
-				bdp->cbd_sc = 0;
+				bdp->cbd_sc = cpu_to_fec16(0);
 			bdp = fec_enet_get_nextdesc(bdp, fep, q);
 		}
 
 		/* Set the last buffer to wrap */
 		bdp = fec_enet_get_prevdesc(bdp, fep, q);
-		bdp->cbd_sc |= BD_SC_WRAP;
+		bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
 		rxq->cur_rx = rxq->rx_bd_base;
 	}
@@ -844,18 +846,18 @@
 
 		for (i = 0; i < txq->tx_ring_size; i++) {
 			/* Initialize the BD for every fragment in the page. */
-			bdp->cbd_sc = 0;
+			bdp->cbd_sc = cpu_to_fec16(0);
 			if (txq->tx_skbuff[i]) {
 				dev_kfree_skb_any(txq->tx_skbuff[i]);
 				txq->tx_skbuff[i] = NULL;
 			}
-			bdp->cbd_bufaddr = 0;
+			bdp->cbd_bufaddr = cpu_to_fec32(0);
 			bdp = fec_enet_get_nextdesc(bdp, fep, q);
 		}
 
 		/* Set the last buffer to wrap */
 		bdp = fec_enet_get_prevdesc(bdp, fep, q);
-		bdp->cbd_sc |= BD_SC_WRAP;
+		bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 		txq->dirty_tx = bdp;
 	}
 }
@@ -947,8 +949,10 @@
 	 */
 	if (fep->quirks & FEC_QUIRK_ENET_MAC) {
 		memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
-		writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
-		writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
+		writel((__force u32)cpu_to_be32(temp_mac[0]),
+		       fep->hwp + FEC_ADDR_LOW);
+		writel((__force u32)cpu_to_be32(temp_mac[1]),
+		       fep->hwp + FEC_ADDR_HIGH);
 	}
 
 	/* Clear any outstanding interrupt. */
@@ -1222,7 +1226,7 @@
 	while (bdp != READ_ONCE(txq->cur_tx)) {
 		/* Order the load of cur_tx and cbd_sc */
 		rmb();
-		status = READ_ONCE(bdp->cbd_sc);
+		status = fec16_to_cpu(READ_ONCE(bdp->cbd_sc));
 		if (status & BD_ENET_TX_READY)
 			break;
 
@@ -1230,10 +1234,12 @@
 
 		skb = txq->tx_skbuff[index];
 		txq->tx_skbuff[index] = NULL;
-		if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr))
-			dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-					bdp->cbd_datlen, DMA_TO_DEVICE);
-		bdp->cbd_bufaddr = 0;
+		if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+			dma_unmap_single(&fep->pdev->dev,
+					 fec32_to_cpu(bdp->cbd_bufaddr),
+					 fec16_to_cpu(bdp->cbd_datlen),
+					 DMA_TO_DEVICE);
+		bdp->cbd_bufaddr = cpu_to_fec32(0);
 		if (!skb) {
 			bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
 			continue;
@@ -1264,7 +1270,7 @@
 			struct skb_shared_hwtstamps shhwtstamps;
 			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-			fec_enet_hwtstamp(fep, ebdp->ts, &shhwtstamps);
+			fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
 			skb_tstamp_tx(skb, &shhwtstamps);
 		}
 
@@ -1324,10 +1330,8 @@
 	if (off)
 		skb_reserve(skb, fep->rx_align + 1 - off);
 
-	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data,
-					  FEC_ENET_RX_FRSIZE - fep->rx_align,
-					  DMA_FROM_DEVICE);
-	if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+	bdp->cbd_bufaddr = cpu_to_fec32(dma_map_single(&fep->pdev->dev, skb->data, FEC_ENET_RX_FRSIZE - fep->rx_align, DMA_FROM_DEVICE));
+	if (dma_mapping_error(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr))) {
 		if (net_ratelimit())
 			netdev_err(ndev, "Rx DMA memory map failed\n");
 		return -ENOMEM;
@@ -1349,7 +1353,8 @@
 	if (!new_skb)
 		return false;
 
-	dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
+	dma_sync_single_for_cpu(&fep->pdev->dev,
+				fec32_to_cpu(bdp->cbd_bufaddr),
 				FEC_ENET_RX_FRSIZE - fep->rx_align,
 				DMA_FROM_DEVICE);
 	if (!swap)
@@ -1396,7 +1401,7 @@
 	 */
 	bdp = rxq->cur_rx;
 
-	while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
+	while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
 
 		if (pkt_received >= budget)
 			break;
@@ -1438,7 +1443,7 @@
 
 		/* Process the incoming frame. */
 		ndev->stats.rx_packets++;
-		pkt_len = bdp->cbd_datlen;
+		pkt_len = fec16_to_cpu(bdp->cbd_datlen);
 		ndev->stats.rx_bytes += pkt_len;
 
 		index = fec_enet_get_bd_index(rxq->rx_bd_base, bdp, fep);
@@ -1456,7 +1461,8 @@
 				ndev->stats.rx_dropped++;
 				goto rx_processing_done;
 			}
-			dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+			dma_unmap_single(&fep->pdev->dev,
+					 fec32_to_cpu(bdp->cbd_bufaddr),
 					 FEC_ENET_RX_FRSIZE - fep->rx_align,
 					 DMA_FROM_DEVICE);
 		}
@@ -1475,7 +1481,8 @@
 		/* If this is a VLAN packet remove the VLAN Tag */
 		vlan_packet_rcvd = false;
 		if ((ndev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-			fep->bufdesc_ex && (ebdp->cbd_esc & BD_ENET_RX_VLAN)) {
+		    fep->bufdesc_ex &&
+		    (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) {
 			/* Push and remove the vlan tag */
 			struct vlan_hdr *vlan_header =
 					(struct vlan_hdr *) (data + ETH_HLEN);
@@ -1491,12 +1498,12 @@
 
 		/* Get receive timestamp from the skb */
 		if (fep->hwts_rx_en && fep->bufdesc_ex)
-			fec_enet_hwtstamp(fep, ebdp->ts,
+			fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts),
 					  skb_hwtstamps(skb));
 
 		if (fep->bufdesc_ex &&
 		    (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
-			if (!(ebdp->cbd_esc & FLAG_RX_CSUM_ERROR)) {
+			if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) {
 				/* don't check it */
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 			} else {
@@ -1513,7 +1520,8 @@
 		napi_gro_receive(&fep->napi, skb);
 
 		if (is_copybreak) {
-			dma_sync_single_for_device(&fep->pdev->dev, bdp->cbd_bufaddr,
+			dma_sync_single_for_device(&fep->pdev->dev,
+						   fec32_to_cpu(bdp->cbd_bufaddr),
 						   FEC_ENET_RX_FRSIZE - fep->rx_align,
 						   DMA_FROM_DEVICE);
 		} else {
@@ -1527,12 +1535,12 @@
 
 		/* Mark the buffer empty */
 		status |= BD_ENET_RX_EMPTY;
-		bdp->cbd_sc = status;
+		bdp->cbd_sc = cpu_to_fec16(status);
 
 		if (fep->bufdesc_ex) {
 			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-			ebdp->cbd_esc = BD_ENET_RX_INT;
+			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
 			ebdp->cbd_prot = 0;
 			ebdp->cbd_bdu = 0;
 		}
@@ -2145,8 +2153,7 @@
 
 /* List of registers that can be safety be read to dump them with ethtool */
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-	defined(CONFIG_M520x) || defined(CONFIG_M532x) ||		\
-	defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+	defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 static u32 fec_enet_register_offset[] = {
 	FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
 	FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -2662,7 +2669,7 @@
 			rxq->rx_skbuff[i] = NULL;
 			if (skb) {
 				dma_unmap_single(&fep->pdev->dev,
-						 bdp->cbd_bufaddr,
+						 fec32_to_cpu(bdp->cbd_bufaddr),
 						 FEC_ENET_RX_FRSIZE - fep->rx_align,
 						 DMA_FROM_DEVICE);
 				dev_kfree_skb(skb);
@@ -2777,11 +2784,11 @@
 		}
 
 		rxq->rx_skbuff[i] = skb;
-		bdp->cbd_sc = BD_ENET_RX_EMPTY;
+		bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
 
 		if (fep->bufdesc_ex) {
 			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-			ebdp->cbd_esc = BD_ENET_RX_INT;
+			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
 		}
 
 		bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2789,7 +2796,7 @@
 
 	/* Set the last buffer to wrap. */
 	bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-	bdp->cbd_sc |= BD_SC_WRAP;
+	bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 	return 0;
 
  err_alloc:
@@ -2812,12 +2819,12 @@
 		if (!txq->tx_bounce[i])
 			goto err_alloc;
 
-		bdp->cbd_sc = 0;
-		bdp->cbd_bufaddr = 0;
+		bdp->cbd_sc = cpu_to_fec16(0);
+		bdp->cbd_bufaddr = cpu_to_fec32(0);
 
 		if (fep->bufdesc_ex) {
 			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-			ebdp->cbd_esc = BD_ENET_TX_INT;
+			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_TX_INT);
 		}
 
 		bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2825,7 +2832,7 @@
 
 	/* Set the last buffer to wrap. */
 	bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-	bdp->cbd_sc |= BD_SC_WRAP;
+	bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 623aa1c..79a210a 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -2791,6 +2791,8 @@
 		goto fman_free;
 	}
 
+	fman->dev = &of_dev->dev;
+
 	return fman;
 
 fman_node_put:
@@ -2845,8 +2847,6 @@
 
 	dev_set_drvdata(dev, fman);
 
-	fman->dev = dev;
-
 	dev_dbg(dev, "FMan%d probed\n", fman->dts_params.id);
 
 	return 0;
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index 52e0091..1ba359f 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -552,7 +552,7 @@
 	cbd_t __iomem *prev_bd;
 	cbd_t __iomem *last_tx_bd;
 
-	last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t));
+	last_tx_bd = fep->tx_bd_base + (fpi->tx_ring - 1);
 
 	/* get the current bd held in TBPTR  and scan back from this point */
 	recheck_bd = curr_tbptr = (cbd_t __iomem *)
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 2aa7b40..b9ecf19 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1111,8 +1111,10 @@
 
 	if ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) == 0x20))
 		priv->errata |= GFAR_ERRATA_12;
+	/* P2020/P1010 Rev 1; MPC8548 Rev 2 */
 	if (((SVR_SOC_VER(svr) == SVR_P2020) && (SVR_REV(svr) < 0x20)) ||
-	    ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)))
+	    ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)) ||
+	    ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) < 0x31)))
 		priv->errata |= GFAR_ERRATA_76; /* aka eTSEC 20 */
 }
 #endif
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index a7139f5..678f501 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -469,8 +469,8 @@
 		    goto failed;
 	    }
 	    /* Read MACID from CIS */
-	    for (i = 5; i < 11; i++)
-		    dev->dev_addr[i] = buf[i];
+	    for (i = 0; i < 6; i++)
+		    dev->dev_addr[i] = buf[i + 5];
 	    kfree(buf);
 	} else {
 	    if (pcmcia_get_mac_from_cis(link, dev))
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 74beb18..4ccc032 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -25,6 +25,7 @@
 
 config HIP04_ETH
 	tristate "HISILICON P04 Ethernet support"
+	depends on HAS_IOMEM	# For MFD_SYSCON
 	select MARVELL_PHY
 	select MFD_SYSCON
 	select HNS_MDIO
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index b364529..3bfe36f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -95,21 +95,17 @@
 static int __ae_match(struct device *dev, const void *data)
 {
 	struct hnae_ae_dev *hdev = cls_to_ae_dev(dev);
-	const char *ae_id = data;
 
-	if (!strncmp(ae_id, hdev->name, AE_NAME_SIZE))
-		return 1;
-
-	return 0;
+	return hdev->dev->of_node == data;
 }
 
-static struct hnae_ae_dev *find_ae(const char *ae_id)
+static struct hnae_ae_dev *find_ae(const struct device_node *ae_node)
 {
 	struct device *dev;
 
-	WARN_ON(!ae_id);
+	WARN_ON(!ae_node);
 
-	dev = class_find_device(hnae_class, NULL, ae_id, __ae_match);
+	dev = class_find_device(hnae_class, NULL, ae_node, __ae_match);
 
 	return dev ? cls_to_ae_dev(dev) : NULL;
 }
@@ -316,7 +312,8 @@
  * return handle ptr or ERR_PTR
  */
 struct hnae_handle *hnae_get_handle(struct device *owner_dev,
-				    const char *ae_id, u32 port_id,
+				    const struct device_node *ae_node,
+				    u32 port_id,
 				    struct hnae_buf_ops *bops)
 {
 	struct hnae_ae_dev *dev;
@@ -324,7 +321,7 @@
 	int i, j;
 	int ret;
 
-	dev = find_ae(ae_id);
+	dev = find_ae(ae_node);
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 6ca94dc..1cbcb9f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -524,8 +524,11 @@
 
 #define ring_to_dev(ring) ((ring)->q->dev->dev)
 
-struct hnae_handle *hnae_get_handle(struct device *owner_dev, const char *ae_id,
-				    u32 port_id, struct hnae_buf_ops *bops);
+struct hnae_handle *hnae_get_handle(struct device *owner_dev,
+				    const struct device_node *ae_node,
+				    u32 port_id,
+				    struct hnae_buf_ops *bops);
+
 void hnae_put_handle(struct hnae_handle *handle);
 int hnae_ae_register(struct hnae_ae_dev *dev, struct module *owner);
 void hnae_ae_unregister(struct hnae_ae_dev *dev);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 522b264..d4f92ed 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -675,8 +675,12 @@
 {
 	int ret;
 	struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
 
 	switch (loop) {
+	case MAC_INTERNALLOOP_PHY:
+		ret = 0;
+		break;
 	case MAC_INTERNALLOOP_SERDES:
 		ret = hns_mac_config_sds_loopback(vf_cb->mac_cb, en);
 		break;
@@ -686,6 +690,10 @@
 	default:
 		ret = -EINVAL;
 	}
+
+	if (!ret)
+		hns_dsaf_set_inner_lb(mac_cb->dsaf_dev, mac_cb->mac_id, en);
+
 	return ret;
 }
 
@@ -847,6 +855,7 @@
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
 {
 	struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev;
+	static atomic_t id = ATOMIC_INIT(-1);
 
 	switch (dsaf_dev->dsaf_ver) {
 	case AE_VERSION_1:
@@ -858,6 +867,9 @@
 	default:
 		break;
 	}
+
+	snprintf(ae_dev->name, AE_NAME_SIZE, "%s%d", DSAF_DEVICE_NAME,
+		 (int)atomic_inc_return(&id));
 	ae_dev->ops = &hns_dsaf_ops;
 	ae_dev->dev = dsaf_dev->dev;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 1c33bd0..38fc5be 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -35,7 +35,7 @@
 	int ret, i;
 	u32 desc_num;
 	u32 buf_size;
-	const char *name, *mode_str;
+	const char *mode_str;
 	struct device_node *np = dsaf_dev->dev->of_node;
 
 	if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
@@ -43,14 +43,6 @@
 	else
 		dsaf_dev->dsaf_ver = AE_VERSION_2;
 
-	ret = of_property_read_string(np, "dsa_name", &name);
-	if (ret) {
-		dev_err(dsaf_dev->dev, "get dsaf name fail, ret=%d!\n", ret);
-		return ret;
-	}
-	strncpy(dsaf_dev->ae_dev.name, name, AE_NAME_SIZE);
-	dsaf_dev->ae_dev.name[AE_NAME_SIZE - 1] = '\0';
-
 	ret = of_property_read_string(np, "mode", &mode_str);
 	if (ret) {
 		dev_err(dsaf_dev->dev, "get dsaf mode fail, ret=%d!\n", ret);
@@ -238,6 +230,30 @@
 	}
 }
 
+static void hns_dsaf_inner_qid_cfg(struct dsaf_device *dsaf_dev)
+{
+	u16 max_q_per_vf, max_vfn;
+	u32 q_id, q_num_per_port;
+	u32 mac_id;
+
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+		return;
+
+	hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode,
+			       HNS_DSAF_COMM_SERVICE_NW_IDX,
+			       &max_vfn, &max_q_per_vf);
+	q_num_per_port = max_vfn * max_q_per_vf;
+
+	for (mac_id = 0, q_id = 0; mac_id < DSAF_SERVICE_NW_NUM; mac_id++) {
+		dsaf_set_dev_field(dsaf_dev,
+				   DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
+				   DSAFV2_SERDES_LBK_QID_M,
+				   DSAFV2_SERDES_LBK_QID_S,
+				   q_id);
+		q_id += q_num_per_port;
+	}
+}
+
 /**
  * hns_dsaf_sw_port_type_cfg - cfg sw type
  * @dsaf_id: dsa fabric id
@@ -699,6 +715,16 @@
 	dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, DSAF_CFG_MIX_MODE_S, !!en);
 }
 
+void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en)
+{
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
+	    dsaf_dev->mac_cb[mac_id].mac_type == HNAE_PORT_DEBUG)
+		return;
+
+	dsaf_set_dev_bit(dsaf_dev, DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
+			 DSAFV2_SERDES_LBK_EN_B, !!en);
+}
+
 /**
  * hns_dsaf_tbl_stat_en - tbl
  * @dsaf_id: dsa fabric id
@@ -1030,6 +1056,9 @@
 	/* set promisc def queue id */
 	hns_dsaf_mix_def_qid_cfg(dsaf_dev);
 
+	/* set inner loopback queue id */
+	hns_dsaf_inner_qid_cfg(dsaf_dev);
+
 	/* in non switch mode, set all port to access mode */
 	hns_dsaf_sw_port_type_cfg(dsaf_dev, DSAF_SW_PORT_TYPE_NON_VLAN);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 31c312f..5fea226 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -18,6 +18,7 @@
 
 #define DSAF_DRV_NAME "hns_dsaf"
 #define DSAF_MOD_VERSION "v1.0"
+#define DSAF_DEVICE_NAME "dsaf"
 
 #define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000
 
@@ -416,5 +417,6 @@
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data);
 int hns_dsaf_get_regs_count(void);
 void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en);
+void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en);
 
 #endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index f0c4f9b0..60d695d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -134,6 +134,7 @@
 #define DSAF_XGE_INT_STS_0_REG		0x1C0
 #define DSAF_PPE_INT_STS_0_REG		0x1E0
 #define DSAF_ROCEE_INT_STS_0_REG	0x200
+#define DSAFV2_SERDES_LBK_0_REG         0x220
 #define DSAF_PPE_QID_CFG_0_REG		0x300
 #define DSAF_SW_PORT_TYPE_0_REG		0x320
 #define DSAF_STP_PORT_TYPE_0_REG	0x340
@@ -857,6 +858,10 @@
 #define PPEV2_CFG_RSS_TBL_4N3_S	24
 #define PPEV2_CFG_RSS_TBL_4N3_M	(((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N3_S)
 
+#define DSAFV2_SERDES_LBK_EN_B  8
+#define DSAFV2_SERDES_LBK_QID_S 0
+#define DSAFV2_SERDES_LBK_QID_M	(((1UL << 8) - 1) << DSAFV2_SERDES_LBK_QID_S)
+
 #define PPE_CNT_CLR_CE_B	0
 #define PPE_CNT_CLR_SNAP_EN_B	1
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 0e30846..3f77ff7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1802,7 +1802,7 @@
 	int ret;
 
 	h = hnae_get_handle(&priv->netdev->dev,
-			    priv->ae_name, priv->port_id, NULL);
+			    priv->ae_node, priv->port_id, NULL);
 	if (IS_ERR_OR_NULL(h)) {
 		ret = PTR_ERR(h);
 		dev_dbg(priv->dev, "has not handle, register notifier!\n");
@@ -1880,13 +1880,16 @@
 	else
 		priv->enet_ver = AE_VERSION_2;
 
-	ret = of_property_read_string(node, "ae-name", &priv->ae_name);
-	if (ret)
-		goto out_read_string_fail;
+	priv->ae_node = (void *)of_parse_phandle(node, "ae-handle", 0);
+	if (IS_ERR_OR_NULL(priv->ae_node)) {
+		ret = PTR_ERR(priv->ae_node);
+		dev_err(dev, "not find ae-handle\n");
+		goto out_read_prop_fail;
+	}
 
 	ret = of_property_read_u32(node, "port-id", &priv->port_id);
 	if (ret)
-		goto out_read_string_fail;
+		goto out_read_prop_fail;
 
 	hns_init_mac_addr(ndev);
 
@@ -1945,7 +1948,7 @@
 
 out_notify_fail:
 	(void)cancel_work_sync(&priv->service_task);
-out_read_string_fail:
+out_read_prop_fail:
 	free_netdev(ndev);
 	return ret;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 4b75270..c68ab3d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -51,7 +51,7 @@
 };
 
 struct hns_nic_priv {
-	const char *ae_name;
+	const struct device_node *ae_node;
 	u32 enet_ver;
 	u32 port_id;
 	int phy_mode;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 3df2284..3c4a3bc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -295,8 +295,10 @@
 
 	switch (loop) {
 	case MAC_INTERNALLOOP_PHY:
-		if ((phy_dev) && (!phy_dev->is_c45))
+		if ((phy_dev) && (!phy_dev->is_c45)) {
 			ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+			ret |= h->dev->ops->set_loopback(h, loop, 0x1);
+		}
 		break;
 	case MAC_INTERNALLOOP_MAC:
 		if ((h->dev->ops->set_loopback) &&
@@ -376,6 +378,7 @@
 			       struct sk_buff *skb)
 {
 	struct net_device *ndev;
+	struct hns_nic_priv *priv;
 	struct hnae_ring *ring;
 	struct netdev_queue *dev_queue;
 	struct sk_buff *new_skb;
@@ -385,8 +388,17 @@
 	char buff[33]; /* 32B data and the last character '\0' */
 
 	if (!ring_data) { /* Just for doing create frame*/
+		ndev = skb->dev;
+		priv = netdev_priv(ndev);
+
 		frame_size = skb->len;
 		memset(skb->data, 0xFF, frame_size);
+		if ((!AE_IS_VER1(priv->enet_ver)) &&
+		    (priv->ae_handle->port_type == HNAE_PORT_SERVICE)) {
+			memcpy(skb->data, ndev->dev_addr, 6);
+			skb->data[5] += 0x1f;
+		}
+
 		frame_size &= ~1ul;
 		memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
 		memset(&skb->data[frame_size / 2 + 10], 0xBE,
@@ -486,6 +498,7 @@
 
 	/* place data into test skb */
 	(void)skb_put(skb, size);
+	skb->dev = ndev;
 	__lb_other_process(NULL, skb);
 	skb->queue_mapping = NIC_LB_TEST_RING_ID;
 
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 1d5c3e1..3daf2d4 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -194,7 +194,6 @@
 };
 #endif
 
-#ifdef CONFIG_EISA
 static struct eisa_device_id hp100_eisa_tbl[] = {
 	{ "HWPF180" }, /* HP J2577 rev A */
 	{ "HWP1920" }, /* HP 27248B */
@@ -205,9 +204,7 @@
 	{ "" }	       /* Mandatory final entry ! */
 };
 MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
-#endif
 
-#ifdef CONFIG_PCI
 static const struct pci_device_id hp100_pci_tbl[] = {
 	{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
 	{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
@@ -219,7 +216,6 @@
 	{}			/* Terminating entry */
 };
 MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
-#endif
 
 static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
 static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
@@ -2842,7 +2838,6 @@
 	free_netdev(d);
 }
 
-#ifdef CONFIG_EISA
 static int hp100_eisa_probe(struct device *gendev)
 {
 	struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
@@ -2884,9 +2879,7 @@
 		.remove  = hp100_eisa_remove,
         }
 };
-#endif
 
-#ifdef CONFIG_PCI
 static int hp100_pci_probe(struct pci_dev *pdev,
 			   const struct pci_device_id *ent)
 {
@@ -2955,7 +2948,6 @@
 	.probe		= hp100_pci_probe,
 	.remove		= hp100_pci_remove,
 };
-#endif
 
 /*
  *  module section
@@ -3032,23 +3024,17 @@
 	err = hp100_isa_init();
 	if (err && err != -ENODEV)
 		goto out;
-#ifdef CONFIG_EISA
 	err = eisa_driver_register(&hp100_eisa_driver);
 	if (err && err != -ENODEV)
 		goto out2;
-#endif
-#ifdef CONFIG_PCI
 	err = pci_register_driver(&hp100_pci_driver);
 	if (err && err != -ENODEV)
 		goto out3;
-#endif
  out:
 	return err;
  out3:
-#ifdef CONFIG_EISA
 	eisa_driver_unregister (&hp100_eisa_driver);
  out2:
-#endif
 	hp100_isa_cleanup();
 	goto out;
 }
@@ -3057,12 +3043,8 @@
 static void __exit hp100_module_exit(void)
 {
 	hp100_isa_cleanup();
-#ifdef CONFIG_EISA
 	eisa_driver_unregister (&hp100_eisa_driver);
-#endif
-#ifdef CONFIG_PCI
 	pci_unregister_driver (&hp100_pci_driver);
-#endif
 }
 
 module_init(hp100_module_init)
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 335417b..ebe6071 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1166,7 +1166,10 @@
 	if (!firmware_has_feature(FW_FEATURE_CMO))
 		netdev_err(netdev, "tx: unable to map xmit buffer\n");
 	adapter->tx_map_failed++;
-	skb_linearize(skb);
+	if (skb_linearize(skb)) {
+		netdev->stats.tx_dropped++;
+		goto out;
+	}
 	force_bounce = 1;
 	goto retry_bounce;
 }
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7d657084..6e9e16ee 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1348,44 +1348,44 @@
 	crq.request_capability.cmd = REQUEST_CAPABILITY;
 
 	crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
-	crq.request_capability.number = cpu_to_be32(adapter->req_tx_queues);
+	crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
-	crq.request_capability.number = cpu_to_be32(adapter->req_rx_queues);
+	crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
-	crq.request_capability.number = cpu_to_be32(adapter->req_rx_add_queues);
+	crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability =
 	    cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
 	crq.request_capability.number =
-	    cpu_to_be32(adapter->req_tx_entries_per_subcrq);
+	    cpu_to_be64(adapter->req_tx_entries_per_subcrq);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability =
 	    cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
 	crq.request_capability.number =
-	    cpu_to_be32(adapter->req_rx_add_entries_per_subcrq);
+	    cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability = cpu_to_be16(REQ_MTU);
-	crq.request_capability.number = cpu_to_be32(adapter->req_mtu);
+	crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
 	ibmvnic_send_crq(adapter, &crq);
 
 	if (adapter->netdev->flags & IFF_PROMISC) {
 		if (adapter->promisc_supported) {
 			crq.request_capability.capability =
 			    cpu_to_be16(PROMISC_REQUESTED);
-			crq.request_capability.number = cpu_to_be32(1);
+			crq.request_capability.number = cpu_to_be64(1);
 			ibmvnic_send_crq(adapter, &crq);
 		}
 	} else {
 		crq.request_capability.capability =
 		    cpu_to_be16(PROMISC_REQUESTED);
-		crq.request_capability.number = cpu_to_be32(0);
+		crq.request_capability.number = cpu_to_be64(0);
 		ibmvnic_send_crq(adapter, &crq);
 	}
 
@@ -2312,93 +2312,93 @@
 	switch (be16_to_cpu(crq->query_capability.capability)) {
 	case MIN_TX_QUEUES:
 		adapter->min_tx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_tx_queues = %lld\n",
 			   adapter->min_tx_queues);
 		break;
 	case MIN_RX_QUEUES:
 		adapter->min_rx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_rx_queues = %lld\n",
 			   adapter->min_rx_queues);
 		break;
 	case MIN_RX_ADD_QUEUES:
 		adapter->min_rx_add_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_rx_add_queues = %lld\n",
 			   adapter->min_rx_add_queues);
 		break;
 	case MAX_TX_QUEUES:
 		adapter->max_tx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_tx_queues = %lld\n",
 			   adapter->max_tx_queues);
 		break;
 	case MAX_RX_QUEUES:
 		adapter->max_rx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_rx_queues = %lld\n",
 			   adapter->max_rx_queues);
 		break;
 	case MAX_RX_ADD_QUEUES:
 		adapter->max_rx_add_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_rx_add_queues = %lld\n",
 			   adapter->max_rx_add_queues);
 		break;
 	case MIN_TX_ENTRIES_PER_SUBCRQ:
 		adapter->min_tx_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n",
 			   adapter->min_tx_entries_per_subcrq);
 		break;
 	case MIN_RX_ADD_ENTRIES_PER_SUBCRQ:
 		adapter->min_rx_add_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n",
 			   adapter->min_rx_add_entries_per_subcrq);
 		break;
 	case MAX_TX_ENTRIES_PER_SUBCRQ:
 		adapter->max_tx_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n",
 			   adapter->max_tx_entries_per_subcrq);
 		break;
 	case MAX_RX_ADD_ENTRIES_PER_SUBCRQ:
 		adapter->max_rx_add_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n",
 			   adapter->max_rx_add_entries_per_subcrq);
 		break;
 	case TCP_IP_OFFLOAD:
 		adapter->tcp_ip_offload =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "tcp_ip_offload = %lld\n",
 			   adapter->tcp_ip_offload);
 		break;
 	case PROMISC_SUPPORTED:
 		adapter->promisc_supported =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "promisc_supported = %lld\n",
 			   adapter->promisc_supported);
 		break;
 	case MIN_MTU:
-		adapter->min_mtu = be32_to_cpu(crq->query_capability.number);
+		adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
 		break;
 	case MAX_MTU:
-		adapter->max_mtu = be32_to_cpu(crq->query_capability.number);
+		adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
 		break;
 	case MAX_MULTICAST_FILTERS:
 		adapter->max_multicast_filters =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_multicast_filters = %lld\n",
 			   adapter->max_multicast_filters);
 		break;
 	case VLAN_HEADER_INSERTION:
 		adapter->vlan_header_insertion =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		if (adapter->vlan_header_insertion)
 			netdev->features |= NETIF_F_HW_VLAN_STAG_TX;
 		netdev_dbg(netdev, "vlan_header_insertion = %lld\n",
@@ -2406,43 +2406,43 @@
 		break;
 	case MAX_TX_SG_ENTRIES:
 		adapter->max_tx_sg_entries =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_tx_sg_entries = %lld\n",
 			   adapter->max_tx_sg_entries);
 		break;
 	case RX_SG_SUPPORTED:
 		adapter->rx_sg_supported =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "rx_sg_supported = %lld\n",
 			   adapter->rx_sg_supported);
 		break;
 	case OPT_TX_COMP_SUB_QUEUES:
 		adapter->opt_tx_comp_sub_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n",
 			   adapter->opt_tx_comp_sub_queues);
 		break;
 	case OPT_RX_COMP_QUEUES:
 		adapter->opt_rx_comp_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n",
 			   adapter->opt_rx_comp_queues);
 		break;
 	case OPT_RX_BUFADD_Q_PER_RX_COMP_Q:
 		adapter->opt_rx_bufadd_q_per_rx_comp_q =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n",
 			   adapter->opt_rx_bufadd_q_per_rx_comp_q);
 		break;
 	case OPT_TX_ENTRIES_PER_SUBCRQ:
 		adapter->opt_tx_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n",
 			   adapter->opt_tx_entries_per_subcrq);
 		break;
 	case OPT_RXBA_ENTRIES_PER_SUBCRQ:
 		adapter->opt_rxba_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n",
 			   adapter->opt_rxba_entries_per_subcrq);
 		break;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 1242925..1a9993c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -319,10 +319,8 @@
 	u8 first;
 	u8 cmd;
 	__be16 capability; /* one of ibmvnic_capabilities */
+	__be64 number;
 	struct ibmvnic_rc rc;
-	__be32 number; /*FIX: should be __be64, but I'm getting the least
-			* significant word first
-			*/
 } __packed __aligned(8);
 
 struct ibmvnic_login {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index bb4612c..8f3b53e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7117,9 +7117,7 @@
 	i40e_watchdog_subtask(pf);
 	i40e_fdir_reinit_subtask(pf);
 	i40e_sync_filters_subtask(pf);
-#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
 	i40e_sync_udp_filters_subtask(pf);
-#endif
 	i40e_clean_adminq_subtask(pf);
 
 	i40e_service_event_complete(pf);
@@ -8515,6 +8513,8 @@
 }
 
 #endif
+
+#if IS_ENABLED(CONFIG_VXLAN)
 /**
  * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @netdev: This physical port's netdev
@@ -8524,7 +8524,6 @@
 static void i40e_add_vxlan_port(struct net_device *netdev,
 				sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
@@ -8557,7 +8556,6 @@
 	pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
 	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
 	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-#endif
 }
 
 /**
@@ -8569,7 +8567,6 @@
 static void i40e_del_vxlan_port(struct net_device *netdev,
 				sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
@@ -8592,9 +8589,10 @@
 		netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
 			    ntohs(port));
 	}
-#endif
 }
+#endif
 
+#if IS_ENABLED(CONFIG_GENEVE)
 /**
  * i40e_add_geneve_port - Get notifications about GENEVE ports that come up
  * @netdev: This physical port's netdev
@@ -8604,7 +8602,6 @@
 static void i40e_add_geneve_port(struct net_device *netdev,
 				 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
@@ -8639,7 +8636,6 @@
 	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
 
 	dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port));
-#endif
 }
 
 /**
@@ -8651,7 +8647,6 @@
 static void i40e_del_geneve_port(struct net_device *netdev,
 				 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
@@ -8677,8 +8672,8 @@
 		netdev_warn(netdev, "geneve port %d was not found, not deleting\n",
 			    ntohs(port));
 	}
-#endif
 }
+#endif
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
 				 struct netdev_phys_item_id *ppid)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 720516b..47bd8b3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2313,8 +2313,8 @@
 	struct iphdr *this_ip_hdr;
 	u32 network_hdr_len;
 	u8 l4_hdr = 0;
-	struct udphdr *oudph;
-	struct iphdr *oiph;
+	struct udphdr *oudph = NULL;
+	struct iphdr *oiph = NULL;
 	u32 l4_tunnel = 0;
 
 	if (skb->encapsulation) {
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index b1de7af..3ddf657 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -270,11 +270,17 @@
 }
 
 static inline void
-jme_clear_pm(struct jme_adapter *jme)
+jme_clear_pm_enable_wol(struct jme_adapter *jme)
 {
 	jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs);
 }
 
+static inline void
+jme_clear_pm_disable_wol(struct jme_adapter *jme)
+{
+	jwrite32(jme, JME_PMCS, PMCS_STMASK);
+}
+
 static int
 jme_reload_eeprom(struct jme_adapter *jme)
 {
@@ -1853,7 +1859,7 @@
 	struct jme_adapter *jme = netdev_priv(netdev);
 	int rc;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	JME_NAPI_ENABLE(jme);
 
 	tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
@@ -1925,11 +1931,11 @@
 static void
 jme_powersave_phy(struct jme_adapter *jme)
 {
-	if (jme->reg_pmcs) {
+	if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) {
 		jme_set_100m_half(jme);
 		if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
 			jme_wait_link(jme);
-		jme_clear_pm(jme);
+		jme_clear_pm_enable_wol(jme);
 	} else {
 		jme_phy_off(jme);
 	}
@@ -2646,9 +2652,6 @@
 	if (wol->wolopts & WAKE_MAGIC)
 		jme->reg_pmcs |= PMCS_MFEN;
 
-	jwrite32(jme, JME_PMCS, jme->reg_pmcs);
-	device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
-
 	return 0;
 }
 
@@ -3172,8 +3175,8 @@
 	jme->mii_if.mdio_read = jme_mdio_read;
 	jme->mii_if.mdio_write = jme_mdio_write;
 
-	jme_clear_pm(jme);
-	device_set_wakeup_enable(&pdev->dev, true);
+	jme_clear_pm_disable_wol(jme);
+	device_init_wakeup(&pdev->dev, true);
 
 	jme_set_phyfifo_5level(jme);
 	jme->pcirev = pdev->revision;
@@ -3304,7 +3307,7 @@
 	if (!netif_running(netdev))
 		return 0;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	jme_phy_on(jme);
 	if (test_bit(JME_FLAG_SSET, &jme->flags))
 		jme_set_settings(netdev, &jme->old_ecmd);
@@ -3312,13 +3315,14 @@
 		jme_reset_phy_processor(jme);
 	jme_phy_calibration(jme);
 	jme_phy_setEA(jme);
-	jme_start_irq(jme);
 	netif_device_attach(netdev);
 
 	atomic_inc(&jme->link_changing);
 
 	jme_reset_link(jme);
 
+	jme_start_irq(jme);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index a0c0383..5583118 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -762,10 +762,10 @@
 
 	if (length <= 8 && (uintptr_t)data & 0x7) {
 		/* Copy unaligned small data fragment to TSO header data area */
-		memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+		memcpy(txq->tso_hdrs + tx_index * TSO_HEADER_SIZE,
 		       data, length);
 		desc->buf_ptr = txq->tso_hdrs_dma
-			+ txq->tx_curr_desc * TSO_HEADER_SIZE;
+			+ tx_index * TSO_HEADER_SIZE;
 	} else {
 		/* Alignment is okay, map buffer and hand off to hardware */
 		txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index fabc8df..b0ae69f 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -11,28 +11,28 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/mbus.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <linux/io.h>
-#include <net/tso.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
 #include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/cpu.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tso.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
@@ -370,9 +370,16 @@
 	struct net_device *dev;
 	struct notifier_block cpu_notifier;
 	int rxq_def;
+	/* Protect the access to the percpu interrupt registers,
+	 * ensuring that the configuration remains coherent.
+	 */
+	spinlock_t lock;
+	bool is_stopped;
 
 	/* Core clock */
 	struct clk *clk;
+	/* AXI clock */
+	struct clk *clk_bus;
 	u8 mcast_count[256];
 	u16 tx_ring_size;
 	u16 rx_ring_size;
@@ -1036,6 +1043,43 @@
 	}
 }
 
+static void mvneta_percpu_unmask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are unmasked, but actually only the ones
+	 * mapped to this CPU will be unmasked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+		    MVNETA_RX_INTR_MASK_ALL |
+		    MVNETA_TX_INTR_MASK_ALL |
+		    MVNETA_MISCINTR_INTR_MASK);
+}
+
+static void mvneta_percpu_mask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are masked, but actually only the ones
+	 * mapped to this CPU will be masked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+}
+
+static void mvneta_percpu_clear_intr_cause(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are cleared, but actually only the ones
+	 * mapped to this CPU will be cleared
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+}
+
 /* This method sets defaults to the NETA port:
  *	Clears interrupt Cause and Mask registers.
  *	Clears all MAC tables.
@@ -1053,14 +1097,10 @@
 	int max_cpu = num_present_cpus();
 
 	/* Clear all Cause registers */
-	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 	mvreg_write(pp, MVNETA_INTR_ENABLE, 0);
 
 	/* Enable MBUS Retry bit16 */
@@ -2526,34 +2566,9 @@
 	return 0;
 }
 
-static void mvneta_percpu_unmask_interrupt(void *arg)
-{
-	struct mvneta_port *pp = arg;
-
-	/* All the queue are unmasked, but actually only the ones
-	 * maped to this CPU will be unmasked
-	 */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-		    MVNETA_RX_INTR_MASK_ALL |
-		    MVNETA_TX_INTR_MASK_ALL |
-		    MVNETA_MISCINTR_INTR_MASK);
-}
-
-static void mvneta_percpu_mask_interrupt(void *arg)
-{
-	struct mvneta_port *pp = arg;
-
-	/* All the queue are masked, but actually only the ones
-	 * maped to this CPU will be masked
-	 */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
-}
-
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
-	unsigned int cpu;
+	int cpu;
 
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2562,16 +2577,15 @@
 	mvneta_port_enable(pp);
 
 	/* Enable polling on the port */
-	for_each_present_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
 		napi_enable(&port->napi);
 	}
 
 	/* Unmask interrupts. It has to be done from each CPU */
-	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
-					 pp, true);
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+
 	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
 		    MVNETA_CAUSE_LINK_CHANGE |
@@ -2587,7 +2601,7 @@
 
 	phy_stop(pp->phy_dev);
 
-	for_each_present_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
 		napi_disable(&port->napi);
@@ -2602,13 +2616,10 @@
 	mvneta_port_disable(pp);
 
 	/* Clear all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
 	mvneta_tx_reset(pp);
 	mvneta_rx_reset(pp);
@@ -2845,11 +2856,20 @@
 	disable_percpu_irq(pp->dev->irq);
 }
 
+/* Electing a CPU must be done in an atomic way: it should be done
+ * after or before the removal/insertion of a CPU and this function is
+ * not reentrant.
+ */
 static void mvneta_percpu_elect(struct mvneta_port *pp)
 {
-	int online_cpu_idx, max_cpu, cpu, i = 0;
+	int elected_cpu = 0, max_cpu, cpu, i = 0;
 
-	online_cpu_idx = pp->rxq_def % num_online_cpus();
+	/* Use the cpu associated to the rxq when it is online, in all
+	 * the other cases, use the cpu 0 which can't be offline.
+	 */
+	if (cpu_online(pp->rxq_def))
+		elected_cpu = pp->rxq_def;
+
 	max_cpu = num_present_cpus();
 
 	for_each_online_cpu(cpu) {
@@ -2860,7 +2880,7 @@
 			if ((rxq % max_cpu) == cpu)
 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
 
-		if (i == online_cpu_idx)
+		if (cpu == elected_cpu)
 			/* Map the default receive queue queue to the
 			 * elected CPU
 			 */
@@ -2871,7 +2891,7 @@
 		 * the CPU bound to the default RX queue
 		 */
 		if (txq_number == 1)
-			txq_map = (i == online_cpu_idx) ?
+			txq_map = (cpu == elected_cpu) ?
 				MVNETA_CPU_TXQ_ACCESS(1) : 0;
 		else
 			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
@@ -2900,6 +2920,14 @@
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
+		spin_lock(&pp->lock);
+		/* Configuring the driver for a new CPU while the
+		 * driver is stopping is racy, so just avoid it.
+		 */
+		if (pp->is_stopped) {
+			spin_unlock(&pp->lock);
+			break;
+		}
 		netif_tx_stop_all_queues(pp->dev);
 
 		/* We have to synchronise on tha napi of each CPU
@@ -2915,9 +2943,7 @@
 		}
 
 		/* Mask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 		napi_enable(&port->napi);
 
 
@@ -2932,27 +2958,25 @@
 		 */
 		mvneta_percpu_elect(pp);
 
-		/* Unmask all ethernet port interrupts, as this
-		 * notifier is called for each CPU then the CPU to
-		 * Queue mapping is applied
-		 */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			MVNETA_RX_INTR_MASK(rxq_number) |
-			MVNETA_TX_INTR_MASK(txq_number) |
-			MVNETA_MISCINTR_INTR_MASK);
+		/* Unmask all ethernet port interrupts */
+		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
 			MVNETA_CAUSE_LINK_CHANGE |
 			MVNETA_CAUSE_PSC_SYNC_CHANGE);
 		netif_tx_start_all_queues(pp->dev);
+		spin_unlock(&pp->lock);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		netif_tx_stop_all_queues(pp->dev);
+		/* Thanks to this lock we are sure that any pending
+		 * cpu election is done
+		 */
+		spin_lock(&pp->lock);
 		/* Mask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+		spin_unlock(&pp->lock);
 
 		napi_synchronize(&port->napi);
 		napi_disable(&port->napi);
@@ -2966,12 +2990,11 @@
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		/* Check if a new CPU must be elected now this on is down */
+		spin_lock(&pp->lock);
 		mvneta_percpu_elect(pp);
+		spin_unlock(&pp->lock);
 		/* Unmask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			MVNETA_RX_INTR_MASK(rxq_number) |
-			MVNETA_TX_INTR_MASK(txq_number) |
-			MVNETA_MISCINTR_INTR_MASK);
+		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
 			MVNETA_CAUSE_LINK_CHANGE |
@@ -2986,7 +3009,7 @@
 static int mvneta_open(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	int ret, cpu;
+	int ret;
 
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
@@ -3008,22 +3031,12 @@
 		goto err_cleanup_txqs;
 	}
 
-	/* Even though the documentation says that request_percpu_irq
-	 * doesn't enable the interrupts automatically, it actually
-	 * does so on the local CPU.
-	 *
-	 * Make sure it's disabled.
-	 */
-	mvneta_percpu_disable(pp);
-
 	/* Enable per-CPU interrupt on all the CPU to handle our RX
 	 * queue interrupts
 	 */
-	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_enable,
-					 pp, true);
+	on_each_cpu(mvneta_percpu_enable, pp, true);
 
-
+	pp->is_stopped = false;
 	/* Register a CPU notifier to handle the case where our CPU
 	 * might be taken offline.
 	 */
@@ -3055,13 +3068,20 @@
 static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	int cpu;
 
+	/* Inform that we are stopping so we don't want to setup the
+	 * driver for new CPUs in the notifiers
+	 */
+	spin_lock(&pp->lock);
+	pp->is_stopped = true;
 	mvneta_stop_dev(pp);
 	mvneta_mdio_remove(pp);
 	unregister_cpu_notifier(&pp->cpu_notifier);
-	for_each_present_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_disable, pp, true);
+	/* Now that the notifier are unregistered, we can release le
+	 * lock
+	 */
+	spin_unlock(&pp->lock);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(dev->irq, pp->ports);
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
@@ -3242,26 +3262,25 @@
 	const struct mvneta_statistic *s;
 	void __iomem *base = pp->base;
 	u32 high, low, val;
+	u64 val64;
 	int i;
 
 	for (i = 0, s = mvneta_statistics;
 	     s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
 	     s++, i++) {
-		val = 0;
-
 		switch (s->type) {
 		case T_REG_32:
 			val = readl_relaxed(base + s->offset);
+			pp->ethtool_stats[i] += val;
 			break;
 		case T_REG_64:
 			/* Docs say to read low 32-bit then high */
 			low = readl_relaxed(base + s->offset);
 			high = readl_relaxed(base + s->offset + 4);
-			val = (u64)high << 32 | low;
+			val64 = (u64)high << 32 | low;
+			pp->ethtool_stats[i] += val64;
 			break;
 		}
-
-		pp->ethtool_stats[i] += val;
 	}
 }
 
@@ -3311,9 +3330,7 @@
 
 	netif_tx_stop_all_queues(pp->dev);
 
-	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_mask_interrupt,
-					 pp, true);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
 	/* We have to synchronise on the napi of each CPU */
 	for_each_online_cpu(cpu) {
@@ -3334,7 +3351,9 @@
 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
 
 	/* Update the elected CPU matching the new rxq_def */
+	spin_lock(&pp->lock);
 	mvneta_percpu_elect(pp);
+	spin_unlock(&pp->lock);
 
 	/* We have to synchronise on the napi of each CPU */
 	for_each_online_cpu(cpu) {
@@ -3605,7 +3624,9 @@
 
 	pp->indir[0] = rxq_def;
 
-	pp->clk = devm_clk_get(&pdev->dev, NULL);
+	pp->clk = devm_clk_get(&pdev->dev, "core");
+	if (IS_ERR(pp->clk))
+		pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
 		err = PTR_ERR(pp->clk);
 		goto err_put_phy_node;
@@ -3613,6 +3634,10 @@
 
 	clk_prepare_enable(pp->clk);
 
+	pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+	if (!IS_ERR(pp->clk_bus))
+		clk_prepare_enable(pp->clk_bus);
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	pp->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(pp->base)) {
@@ -3724,6 +3749,7 @@
 err_free_ports:
 	free_percpu(pp->ports);
 err_clk:
+	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
 err_put_phy_node:
 	of_node_put(phy_node);
@@ -3741,6 +3767,7 @@
 	struct mvneta_port *pp = netdev_priv(dev);
 
 	unregister_netdev(dev);
+	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
 	free_percpu(pp->ports);
 	free_percpu(pp->stats);
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a4beccf..c797971a 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -3061,7 +3061,7 @@
 
 		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
 		if (!pe)
-			return -1;
+			return -ENOMEM;
 		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
 		pe->index = tid;
 
@@ -3077,7 +3077,7 @@
 	if (pmap == 0) {
 		if (add) {
 			kfree(pe);
-			return -1;
+			return -EINVAL;
 		}
 		mvpp2_prs_hw_inv(priv, pe->index);
 		priv->prs_shadow[pe->index].valid = false;
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 715de8a..c7e9399 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -182,10 +182,17 @@
 		err = mlx4_reset_slave(dev);
 	else
 		err = mlx4_reset_master(dev);
-	BUG_ON(err != 0);
 
+	if (!err) {
+		mlx4_err(dev, "device was reset successfully\n");
+	} else {
+		/* EEH could have disabled the PCI channel during reset. That's
+		 * recoverable and the PCI error flow will handle it.
+		 */
+		if (!pci_channel_offline(dev->persist->pdev))
+			BUG_ON(1);
+	}
 	dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
-	mlx4_err(dev, "device was reset successfully\n");
 	mutex_unlock(&persist->device_state_mutex);
 
 	/* At that step HW was already reset, now notify clients */
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index d48d579..e94ca1c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2429,7 +2429,7 @@
 	flush_workqueue(priv->mfunc.master.comm_wq);
 	destroy_workqueue(priv->mfunc.master.comm_wq);
 err_slaves:
-	while (--i) {
+	while (i--) {
 		for (port = 1; port <= MLX4_MAX_PORTS; port++)
 			kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 3348e64..a849da9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -318,7 +318,9 @@
 	if (timestamp_en)
 		cq_context->flags  |= cpu_to_be32(1 << 19);
 
-	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
+	cq_context->logsize_usrpage =
+		cpu_to_be32((ilog2(nent) << 24) |
+			    mlx4_to_hw_uar_index(dev, uar->index));
 	cq_context->comp_eqn	    = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 038f9ce..1494997 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -236,6 +236,24 @@
 	.enable		= mlx4_en_phc_enable,
 };
 
+#define MLX4_EN_WRAP_AROUND_SEC	10ULL
+
+/* This function calculates the max shift that enables the user range
+ * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register.
+ */
+static u32 freq_to_shift(u16 freq)
+{
+	u32 freq_khz = freq * 1000;
+	u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+	u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
+		max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1;
+	/* calculate max possible multiplier in order to fit in 64bit */
+	u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
+
+	/* This comes from the reverse of clocksource_khz2mult */
+	return ilog2(div_u64(max_mul * freq_khz, 1000000));
+}
+
 void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 {
 	struct mlx4_dev *dev = mdev->dev;
@@ -254,12 +272,7 @@
 	memset(&mdev->cycles, 0, sizeof(mdev->cycles));
 	mdev->cycles.read = mlx4_en_read_clock;
 	mdev->cycles.mask = CLOCKSOURCE_MASK(48);
-	/* Using shift to make calculation more accurate. Since current HW
-	 * clock frequency is 427 MHz, and cycles are given using a 48 bits
-	 * register, the biggest shift when calculating using u64, is 14
-	 * (max_cycles * multiplier < 2^64)
-	 */
-	mdev->cycles.shift = 14;
+	mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock);
 	mdev->cycles.mult =
 		clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
 	mdev->nominal_c_mult = mdev->cycles.mult;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0c7e3f6..21e2c09 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2245,7 +2245,7 @@
 	struct mlx4_en_dev *mdev = en_priv->mdev;
 	u64 mac_u64 = mlx4_mac_to_u64(mac);
 
-	if (!is_valid_ether_addr(mac))
+	if (is_multicast_ether_addr(mac))
 		return -EINVAL;
 
 	return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64);
@@ -2344,8 +2344,6 @@
 	/* set offloads */
 	priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
-	priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
-	priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
 }
 
 static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
@@ -2356,8 +2354,6 @@
 	/* unset offloads */
 	priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
-	priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
-	priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
 
 	ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
 				  VXLAN_STEER_BY_OUTER_MAC, 0);
@@ -2980,6 +2976,11 @@
 		priv->rss_hash_fn = ETH_RSS_HASH_TOP;
 	}
 
+	if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+		dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+		dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
+	}
+
 	mdev->pndev[port] = dev;
 	mdev->upper[port] = NULL;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index ee99e67..3904b5f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -238,11 +238,11 @@
 	stats->collisions = 0;
 	stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP);
 	stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
-	stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->rx_over_errors = 0;
 	stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
 	stats->rx_frame_errors = 0;
 	stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
-	stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->rx_missed_errors = 0;
 	stats->tx_aborted_errors = 0;
 	stats->tx_carrier_errors = 0;
 	stats->tx_fifo_errors = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 12aab5a..02e925d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -58,7 +58,8 @@
 	} else {
 		context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
 	}
-	context->usr_page = cpu_to_be32(mdev->priv_uar.index);
+	context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+					mdev->priv_uar.index));
 	context->local_qpn = cpu_to_be32(qpn);
 	context->pri_path.ackto = 1 & 0x07;
 	context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4421bf5..e0946ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -213,7 +213,9 @@
 	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
 				ring->cqn, user_prio, &ring->context);
 	if (ring->bf_alloced)
-		ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
+		ring->context.usr_page =
+			cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+							 ring->bf.uar->index));
 
 	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
 			       &ring->qp, &ring->qp_state);
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 4696053..f613977 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -940,9 +940,10 @@
 
 	if (!priv->eq_table.uar_map[index]) {
 		priv->eq_table.uar_map[index] =
-			ioremap(pci_resource_start(dev->persist->pdev, 2) +
-				((eq->eqn / 4) << PAGE_SHIFT),
-				PAGE_SIZE);
+			ioremap(
+				pci_resource_start(dev->persist->pdev, 2) +
+				((eq->eqn / 4) << (dev->uar_page_shift)),
+				(1 << (dev->uar_page_shift)));
 		if (!priv->eq_table.uar_map[index]) {
 			mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
 				 eq->eqn);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index f1b6d21..f8674ae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -168,6 +168,20 @@
 
 static atomic_t pf_loading = ATOMIC_INIT(0);
 
+static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
+					      struct mlx4_dev_cap *dev_cap)
+{
+	/* The reserved_uars is calculated by system page size unit.
+	 * Therefore, adjustment is added when the uar page size is less
+	 * than the system page size
+	 */
+	dev->caps.reserved_uars	=
+		max_t(int,
+		      mlx4_get_num_reserved_uar(dev),
+		      dev_cap->reserved_uars /
+			(1 << (PAGE_SHIFT - dev->uar_page_shift)));
+}
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -386,8 +400,6 @@
 	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 
-	/* The first 128 UARs are used for EQ doorbells */
-	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 					dev_cap->reserved_xrcds : 0;
@@ -405,6 +417,15 @@
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 
+	/* Save uar page shift */
+	if (!mlx4_is_slave(dev)) {
+		/* Virtual PCI function needs to determine UAR page size from
+		 * firmware. Only master PCI function can set the uar page size
+		 */
+		dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
+		mlx4_set_num_reserved_uars(dev, dev_cap);
+	}
+
 	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
 		struct mlx4_init_hca_param hca_param;
 
@@ -815,16 +836,25 @@
 		return -ENODEV;
 	}
 
-	/* slave gets uar page size from QUERY_HCA fw command */
-	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+	/* Set uar_page_shift for VF */
+	dev->uar_page_shift = hca_param.uar_page_sz + 12;
 
-	/* TODO: relax this assumption */
-	if (dev->caps.uar_page_size != PAGE_SIZE) {
-		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
-			 dev->caps.uar_page_size, PAGE_SIZE);
-		return -ENODEV;
+	/* Make sure the master uar page size is valid */
+	if (dev->uar_page_shift > PAGE_SHIFT) {
+		mlx4_err(dev,
+			 "Invalid configuration: uar page size is larger than system page size\n");
+		return  -ENODEV;
 	}
 
+	/* Set reserved_uars based on the uar_page_shift */
+	mlx4_set_num_reserved_uars(dev, &dev_cap);
+
+	/* Although uar page size in FW differs from system page size,
+	 * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
+	 * still works with assumption that uar page size == system page size
+	 */
+	dev->caps.uar_page_size = PAGE_SIZE;
+
 	memset(&func_cap, 0, sizeof(func_cap));
 	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
 	if (err) {
@@ -1226,6 +1256,7 @@
 static int mlx4_mf_bond(struct mlx4_dev *dev)
 {
 	int err = 0;
+	int nvfs;
 	struct mlx4_slaves_pport slaves_port1;
 	struct mlx4_slaves_pport slaves_port2;
 	DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
@@ -1242,11 +1273,18 @@
 		return -EINVAL;
 	}
 
+	/* number of virtual functions is number of total functions minus one
+	 * physical function for each port.
+	 */
+	nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
+		bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
+
 	/* limit on maximum allowed VFs */
-	if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
-	    bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
-	    MAX_MF_BOND_ALLOWED_SLAVES)
+	if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
+		mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
+			  nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
 		return -EINVAL;
+	}
 
 	if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
 		mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
@@ -2179,8 +2217,12 @@
 
 		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
 
-		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-		init_hca.uar_page_sz = PAGE_SHIFT - 12;
+		/* Always set UAR page size 4KB, set log_uar_sz accordingly */
+		init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+				      PAGE_SHIFT -
+				      DEFAULT_UAR_PAGE_SHIFT;
+		init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
+
 		init_hca.mw_enabled = 0;
 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
 		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 609c59d..b3cc3ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -269,9 +269,15 @@
 
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
-	if (dev->caps.num_uars <= 128) {
-		mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
-			 dev->caps.num_uars);
+	int num_reserved_uar = mlx4_get_num_reserved_uar(dev);
+
+	mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift);
+	mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars);
+
+	if (dev->caps.num_uars <= num_reserved_uar) {
+		mlx4_err(
+			dev, "Only %d UAR pages (need more than %d)\n",
+			dev->caps.num_uars, num_reserved_uar);
 		mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
 		return -ENODEV;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 787b7bb..211c650 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -193,10 +193,10 @@
 	if (need_mf_bond) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -389,10 +389,10 @@
 	if (dup) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -479,10 +479,10 @@
 	if (dup) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -588,10 +588,10 @@
 	if (need_mf_bond) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -764,10 +764,10 @@
 	if (dup) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index b46dbe2..25ce1b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -915,11 +915,13 @@
 
 	spin_lock_irq(mlx4_tlock(dev));
 	r = find_res(dev, counter_index, RES_COUNTER);
-	if (!r || r->owner != slave)
+	if (!r || r->owner != slave) {
 		ret = -EINVAL;
-	counter = container_of(r, struct res_counter, com);
-	if (!counter->port)
-		counter->port = port;
+	} else {
+		counter = container_of(r, struct res_counter, com);
+		if (!counter->port)
+			counter->port = port;
+	}
 
 	spin_unlock_irq(mlx4_tlock(dev));
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index aac071a..5b17532 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -223,6 +223,7 @@
 
 static const char rq_stats_strings[][ETH_GSTRING_LEN] = {
 	"packets",
+	"bytes",
 	"csum_none",
 	"csum_sw",
 	"lro_packets",
@@ -232,16 +233,18 @@
 
 struct mlx5e_rq_stats {
 	u64 packets;
+	u64 bytes;
 	u64 csum_none;
 	u64 csum_sw;
 	u64 lro_packets;
 	u64 lro_bytes;
 	u64 wqe_err;
-#define NUM_RQ_STATS 6
+#define NUM_RQ_STATS 7
 };
 
 static const char sq_stats_strings[][ETH_GSTRING_LEN] = {
 	"packets",
+	"bytes",
 	"tso_packets",
 	"tso_bytes",
 	"csum_offload_none",
@@ -253,6 +256,7 @@
 
 struct mlx5e_sq_stats {
 	u64 packets;
+	u64 bytes;
 	u64 tso_packets;
 	u64 tso_bytes;
 	u64 csum_offload_none;
@@ -260,7 +264,7 @@
 	u64 wake;
 	u64 dropped;
 	u64 nop;
-#define NUM_SQ_STATS 8
+#define NUM_SQ_STATS 9
 };
 
 struct mlx5e_stats {
@@ -304,14 +308,9 @@
 	MLX5E_RQ_STATE_POST_WQES_ENABLE,
 };
 
-enum cq_flags {
-	MLX5E_CQ_HAS_CQES = 1,
-};
-
 struct mlx5e_cq {
 	/* data path - accessed per cqe */
 	struct mlx5_cqwq           wq;
-	unsigned long              flags;
 
 	/* data path - accessed per napi poll */
 	struct napi_struct        *napi;
@@ -452,6 +451,8 @@
 	MLX5E_NUM_TT,
 };
 
+#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY)
+
 enum mlx5e_rqt_ix {
 	MLX5E_INDIRECTION_RQT,
 	MLX5E_SINGLE_RQ_RQT,
@@ -618,9 +619,12 @@
 void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
 
 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix);
+void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv);
 
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+				   int num_channels);
 
 static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
 				      struct mlx5e_tx_wqe *wqe, int bf_sz)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index be65435..2018eeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -62,10 +62,11 @@
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
 						   overflow_work);
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_read(&tstamp->clock);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 	schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
 }
 
@@ -136,10 +137,11 @@
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						   ptp_info);
 	u64 ns = timespec64_to_ns(ts);
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_init(&tstamp->clock, &tstamp->cycles, ns);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	return 0;
 }
@@ -150,10 +152,11 @@
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						   ptp_info);
 	u64 ns;
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	ns = timecounter_read(&tstamp->clock);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -164,10 +167,11 @@
 {
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						   ptp_info);
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_adjtime(&tstamp->clock, delta);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	return 0;
 }
@@ -176,6 +180,7 @@
 {
 	u64 adj;
 	u32 diff;
+	unsigned long flags;
 	int neg_adj = 0;
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						  ptp_info);
@@ -189,11 +194,11 @@
 	adj *= delta;
 	diff = div_u64(adj, 1000000000ULL);
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_read(&tstamp->clock);
 	tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff :
 					tstamp->nominal_c_mult + diff;
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 65624ac..5abeb00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -385,6 +385,8 @@
 		mlx5e_close_locked(dev);
 
 	priv->params.num_channels = count;
+	mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+				      MLX5E_INDIR_RQT_SIZE, count);
 
 	if (was_opened)
 		err = mlx5e_open_locked(dev);
@@ -703,18 +705,36 @@
 	return 0;
 }
 
+static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+	int i;
+
+	MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+	mlx5e_build_tir_ctx_hash(tirc, priv);
+
+	for (i = 0; i < MLX5E_NUM_TT; i++)
+		if (IS_HASHING_TT(i))
+			mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen);
+}
+
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 			  const u8 *key, const u8 hfunc)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	bool close_open;
-	int err = 0;
+	int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	void *in;
 
 	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
 	    (hfunc != ETH_RSS_HASH_XOR) &&
 	    (hfunc != ETH_RSS_HASH_TOP))
 		return -EINVAL;
 
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
 	mutex_lock(&priv->state_lock);
 
 	if (indir) {
@@ -723,11 +743,6 @@
 		mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT);
 	}
 
-	close_open = (key || (hfunc != ETH_RSS_HASH_NO_CHANGE)) &&
-		     test_bit(MLX5E_STATE_OPENED, &priv->state);
-	if (close_open)
-		mlx5e_close_locked(dev);
-
 	if (key)
 		memcpy(priv->params.toeplitz_hash_key, key,
 		       sizeof(priv->params.toeplitz_hash_key));
@@ -735,12 +750,13 @@
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE)
 		priv->params.rss_hfunc = hfunc;
 
-	if (close_open)
-		err = mlx5e_open_locked(priv->netdev);
+	mlx5e_modify_tirs_hash(priv, in, inlen);
 
 	mutex_unlock(&priv->state_lock);
 
-	return err;
+	kvfree(in);
+
+	return 0;
 }
 
 static int mlx5e_get_rxnfc(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6a3e430..402994b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -141,6 +141,10 @@
 		return;
 
 	/* Collect firts the SW counters and then HW for consistency */
+	s->rx_packets		= 0;
+	s->rx_bytes		= 0;
+	s->tx_packets		= 0;
+	s->tx_bytes		= 0;
 	s->tso_packets		= 0;
 	s->tso_bytes		= 0;
 	s->tx_queue_stopped	= 0;
@@ -155,6 +159,8 @@
 	for (i = 0; i < priv->params.num_channels; i++) {
 		rq_stats = &priv->channel[i]->rq.stats;
 
+		s->rx_packets	+= rq_stats->packets;
+		s->rx_bytes	+= rq_stats->bytes;
 		s->lro_packets	+= rq_stats->lro_packets;
 		s->lro_bytes	+= rq_stats->lro_bytes;
 		s->rx_csum_none	+= rq_stats->csum_none;
@@ -164,6 +170,8 @@
 		for (j = 0; j < priv->params.num_tc; j++) {
 			sq_stats = &priv->channel[i]->sq[j].stats;
 
+			s->tx_packets		+= sq_stats->packets;
+			s->tx_bytes		+= sq_stats->bytes;
 			s->tso_packets		+= sq_stats->tso_packets;
 			s->tso_bytes		+= sq_stats->tso_bytes;
 			s->tx_queue_stopped	+= sq_stats->stopped;
@@ -225,23 +233,6 @@
 	s->tx_broadcast_bytes   =
 		MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
 
-	s->rx_packets =
-		s->rx_unicast_packets +
-		s->rx_multicast_packets +
-		s->rx_broadcast_packets;
-	s->rx_bytes =
-		s->rx_unicast_bytes +
-		s->rx_multicast_bytes +
-		s->rx_broadcast_bytes;
-	s->tx_packets =
-		s->tx_unicast_packets +
-		s->tx_multicast_packets +
-		s->tx_broadcast_packets;
-	s->tx_bytes =
-		s->tx_unicast_bytes +
-		s->tx_multicast_bytes +
-		s->tx_broadcast_bytes;
-
 	/* Update calculated offload counters */
 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
 	s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
@@ -1199,7 +1190,6 @@
 			ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE);
 
 		ix = priv->params.indirection_rqt[ix];
-		ix = ix % priv->params.num_channels;
 		MLX5_SET(rqtc, rqtc, rq_num[i],
 			 test_bit(MLX5E_STATE_OPENED, &priv->state) ?
 			 priv->channel[ix]->rq.rqn :
@@ -1317,7 +1307,22 @@
 			      lro_timer_supported_periods[2]));
 }
 
-static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
+void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv)
+{
+	MLX5_SET(tirc, tirc, rx_hash_fn,
+		 mlx5e_rx_hash_fn(priv->params.rss_hfunc));
+	if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
+		void *rss_key = MLX5_ADDR_OF(tirc, tirc,
+					     rx_hash_toeplitz_key);
+		size_t len = MLX5_FLD_SZ_BYTES(tirc,
+					       rx_hash_toeplitz_key);
+
+		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+		memcpy(rss_key, priv->params.toeplitz_hash_key, len);
+	}
+}
+
+static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 
@@ -1325,6 +1330,7 @@
 	void *tirc;
 	int inlen;
 	int err;
+	int tt;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
 	in = mlx5_vzalloc(inlen);
@@ -1336,7 +1342,11 @@
 
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
-	err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen);
+	for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+		err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen);
+		if (err)
+			break;
+	}
 
 	kvfree(in);
 
@@ -1672,17 +1682,7 @@
 	default:
 		MLX5_SET(tirc, tirc, indirect_table,
 			 priv->rqtn[MLX5E_INDIRECTION_RQT]);
-		MLX5_SET(tirc, tirc, rx_hash_fn,
-			 mlx5e_rx_hash_fn(priv->params.rss_hfunc));
-		if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
-			void *rss_key = MLX5_ADDR_OF(tirc, tirc,
-						     rx_hash_toeplitz_key);
-			size_t len = MLX5_FLD_SZ_BYTES(tirc,
-						       rx_hash_toeplitz_key);
-
-			MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
-			memcpy(rss_key, priv->params.toeplitz_hash_key, len);
-		}
+		mlx5e_build_tir_ctx_hash(tirc, priv);
 		break;
 	}
 
@@ -1885,8 +1885,10 @@
 			mlx5e_close_locked(priv->netdev);
 
 		priv->params.lro_en = !!(features & NETIF_F_LRO);
-		mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP);
-		mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP);
+		err = mlx5e_modify_tirs_lro(priv);
+		if (err)
+			mlx5_core_warn(priv->mdev, "lro modify failed, %d\n",
+				       err);
 
 		if (was_opened)
 			err = mlx5e_open_locked(priv->netdev);
@@ -2024,18 +2026,37 @@
 					    vf_stats);
 }
 
-static struct net_device_ops mlx5e_netdev_ops = {
+static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_stats64         = mlx5e_get_stats,
 	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
 	.ndo_set_mac_address     = mlx5e_set_mac,
-	.ndo_vlan_rx_add_vid	 = mlx5e_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	 = mlx5e_vlan_rx_kill_vid,
+	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
 	.ndo_set_features        = mlx5e_set_features,
-	.ndo_change_mtu		 = mlx5e_change_mtu,
-	.ndo_do_ioctl		 = mlx5e_ioctl,
+	.ndo_change_mtu          = mlx5e_change_mtu,
+	.ndo_do_ioctl            = mlx5e_ioctl,
+};
+
+static const struct net_device_ops mlx5e_netdev_ops_sriov = {
+	.ndo_open                = mlx5e_open,
+	.ndo_stop                = mlx5e_close,
+	.ndo_start_xmit          = mlx5e_xmit,
+	.ndo_get_stats64         = mlx5e_get_stats,
+	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
+	.ndo_set_mac_address     = mlx5e_set_mac,
+	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
+	.ndo_set_features        = mlx5e_set_features,
+	.ndo_change_mtu          = mlx5e_change_mtu,
+	.ndo_do_ioctl            = mlx5e_ioctl,
+	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
+	.ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
+	.ndo_get_vf_config       = mlx5e_get_vf_config,
+	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
+	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2070,12 +2091,20 @@
 	       2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+				   int num_channels)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		indirection_rqt[i] = i % num_channels;
+}
+
 static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 				    struct net_device *netdev,
 				    int num_channels)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	int i;
 
 	priv->params.log_sq_size           =
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
@@ -2099,8 +2128,8 @@
 	netdev_rss_key_fill(priv->params.toeplitz_hash_key,
 			    sizeof(priv->params.toeplitz_hash_key));
 
-	for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
-		priv->params.indirection_rqt[i] = i % num_channels;
+	mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+				      MLX5E_INDIR_RQT_SIZE, num_channels);
 
 	priv->params.lro_wqe_sz            =
 		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -2137,18 +2166,11 @@
 
 	SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
 
-	if (priv->params.num_tc > 1)
-		mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue;
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
+	else
+		netdev->netdev_ops = &mlx5e_netdev_ops_basic;
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
-		mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac;
-		mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan;
-		mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config;
-		mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state;
-		mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats;
-	}
-
-	netdev->netdev_ops        = &mlx5e_netdev_ops;
 	netdev->watchdog_timeo    = 15 * HZ;
 
 	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index dd959d9..59658b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -230,10 +230,6 @@
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
 	int work_done;
 
-	/* avoid accessing cq (dma coherent memory) if not needed */
-	if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
-		return 0;
-
 	for (work_done = 0; work_done < budget; work_done++) {
 		struct mlx5e_rx_wqe *wqe;
 		struct mlx5_cqe64 *cqe;
@@ -267,6 +263,7 @@
 
 		mlx5e_build_rx_skb(cqe, rq, skb);
 		rq->stats.packets++;
+		rq->stats.bytes += be32_to_cpu(cqe->byte_cnt);
 		napi_gro_receive(cq->napi, skb);
 
 wq_ll_pop:
@@ -279,8 +276,5 @@
 	/* ensure cq space is freed before enabling more cqes */
 	wmb();
 
-	if (work_done == budget)
-		set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
-
 	return work_done;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2c3fba0..bb4eeeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -179,6 +179,7 @@
 	unsigned int skb_len = skb->len;
 	u8  opcode = MLX5_OPCODE_SEND;
 	dma_addr_t dma_addr = 0;
+	unsigned int num_bytes;
 	bool bf = false;
 	u16 headlen;
 	u16 ds_cnt;
@@ -204,8 +205,7 @@
 		opcode       = MLX5_OPCODE_LSO;
 		ihs          = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		payload_len  = skb->len - ihs;
-		wi->num_bytes = skb->len +
-				(skb_shinfo(skb)->gso_segs - 1) * ihs;
+		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 		sq->stats.tso_packets++;
 		sq->stats.tso_bytes += payload_len;
 	} else {
@@ -213,9 +213,11 @@
 		     !skb->xmit_more &&
 		     !skb_shinfo(skb)->nr_frags;
 		ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
-		wi->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 	}
 
+	wi->num_bytes = num_bytes;
+
 	if (skb_vlan_tag_present(skb)) {
 		mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data,
 				  &skb_len);
@@ -307,6 +309,7 @@
 	sq->bf_budget = bf ? sq->bf_budget - 1 : 0;
 
 	sq->stats.packets++;
+	sq->stats.bytes += num_bytes;
 	return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
@@ -335,10 +338,6 @@
 	u16 sqcc;
 	int i;
 
-	/* avoid accessing cq (dma coherent memory) if not needed */
-	if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
-		return false;
-
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
 	npkts = 0;
@@ -422,10 +421,6 @@
 				netif_tx_wake_queue(sq->txq);
 				sq->stats.wake++;
 	}
-	if (i == MLX5E_TX_CQ_POLL_BUDGET) {
-		set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
-		return true;
-	}
 
-	return false;
+	return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 4ac8d71..66d51a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -88,7 +88,6 @@
 {
 	struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
-	set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
 	set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags);
 	barrier();
 	napi_schedule(cq->napi);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index c071077..7992c55 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -215,7 +215,7 @@
 {
 	int index = q->producer_counter & (q->count - 1);
 
-	if ((q->producer_counter - q->consumer_counter) == q->count)
+	if ((u16) (q->producer_counter - q->consumer_counter) == q->count)
 		return NULL;
 	return mlxsw_pci_queue_elem_info_get(q, index);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
index 726f543..ae65b99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/port.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h
@@ -49,7 +49,7 @@
 #define MLXSW_PORT_MID			0xd000
 
 #define MLXSW_PORT_MAX_PHY_PORTS	0x40
-#define MLXSW_PORT_MAX_PORTS		MLXSW_PORT_MAX_PHY_PORTS
+#define MLXSW_PORT_MAX_PORTS		(MLXSW_PORT_MAX_PHY_PORTS + 1)
 
 #define MLXSW_PORT_DEVID_BITS_OFFSET	10
 #define MLXSW_PORT_PHY_BITS_OFFSET	4
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0c52372..ffe4c03 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -873,6 +873,62 @@
 	}
 }
 
+/* SPAFT - Switch Port Acceptable Frame Types
+ * ------------------------------------------
+ * The Switch Port Acceptable Frame Types register configures the frame
+ * admittance of the port.
+ */
+#define MLXSW_REG_SPAFT_ID 0x2010
+#define MLXSW_REG_SPAFT_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_spaft = {
+	.id = MLXSW_REG_SPAFT_ID,
+	.len = MLXSW_REG_SPAFT_LEN,
+};
+
+/* reg_spaft_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is not supported (all tag types are allowed).
+ */
+MLXSW_ITEM32(reg, spaft, local_port, 0x00, 16, 8);
+
+/* reg_spaft_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, sub_port, 0x00, 8, 8);
+
+/* reg_spaft_allow_untagged
+ * When set, untagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_untagged, 0x04, 31, 1);
+
+/* reg_spaft_allow_prio_tagged
+ * When set, priority tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_prio_tagged, 0x04, 30, 1);
+
+/* reg_spaft_allow_tagged
+ * When set, tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_tagged, 0x04, 29, 1);
+
+static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port,
+					bool allow_untagged)
+{
+	MLXSW_REG_ZERO(spaft, payload);
+	mlxsw_reg_spaft_local_port_set(payload, local_port);
+	mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged);
+	mlxsw_reg_spaft_allow_prio_tagged_set(payload, true);
+	mlxsw_reg_spaft_allow_tagged_set(payload, true);
+}
+
 /* SFGC - Switch Flooding Group Configuration
  * ------------------------------------------
  * The following register controls the association of flooding tables and MIDs
@@ -1044,6 +1100,92 @@
 	mlxsw_reg_sftr_port_mask_set(payload, port, 1);
 }
 
+/* SFDF - Switch Filtering DB Flush
+ * --------------------------------
+ * The switch filtering DB flush register is used to flush the FDB.
+ * Note that FDB notifications are flushed as well.
+ */
+#define MLXSW_REG_SFDF_ID 0x2013
+#define MLXSW_REG_SFDF_LEN 0x14
+
+static const struct mlxsw_reg_info mlxsw_reg_sfdf = {
+	.id = MLXSW_REG_SFDF_ID,
+	.len = MLXSW_REG_SFDF_LEN,
+};
+
+/* reg_sfdf_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8);
+
+enum mlxsw_reg_sfdf_flush_type {
+	MLXSW_REG_SFDF_FLUSH_PER_SWID,
+	MLXSW_REG_SFDF_FLUSH_PER_FID,
+	MLXSW_REG_SFDF_FLUSH_PER_PORT,
+	MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
+	MLXSW_REG_SFDF_FLUSH_PER_LAG,
+	MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+};
+
+/* reg_sfdf_flush_type
+ * Flush type.
+ * 0 - All SWID dynamic entries are flushed.
+ * 1 - All FID dynamic entries are flushed.
+ * 2 - All dynamic entries pointing to port are flushed.
+ * 3 - All FID dynamic entries pointing to port are flushed.
+ * 4 - All dynamic entries pointing to LAG are flushed.
+ * 5 - All FID dynamic entries pointing to LAG are flushed.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
+
+/* reg_sfdf_flush_static
+ * Static.
+ * 0 - Flush only dynamic entries.
+ * 1 - Flush both dynamic and static entries.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1);
+
+static inline void mlxsw_reg_sfdf_pack(char *payload,
+				       enum mlxsw_reg_sfdf_flush_type type)
+{
+	MLXSW_REG_ZERO(sfdf, payload);
+	mlxsw_reg_sfdf_flush_type_set(payload, type);
+	mlxsw_reg_sfdf_flush_static_set(payload, true);
+}
+
+/* reg_sfdf_fid
+ * FID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16);
+
+/* reg_sfdf_system_port
+ * Port to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16);
+
+/* reg_sfdf_port_fid_system_port
+ * Port to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16);
+
+/* reg_sfdf_lag_id
+ * LAG ID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10);
+
+/* reg_sfdf_lag_fid_lag_id
+ * LAG ID to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10);
+
 /* SLDR - Switch LAG Descriptor Register
  * -----------------------------------------
  * The switch LAG descriptor register is populated by LAG descriptors.
@@ -1701,20 +1843,20 @@
  * Module number.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
 
 /* reg_pmlp_tx_lane
  * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 16, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
 
 /* reg_pmlp_rx_lane
  * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
  * equal to Tx lane.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 24, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
 
 static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 {
@@ -3117,10 +3259,14 @@
 		return "SPVID";
 	case MLXSW_REG_SPVM_ID:
 		return "SPVM";
+	case MLXSW_REG_SPAFT_ID:
+		return "SPAFT";
 	case MLXSW_REG_SFGC_ID:
 		return "SFGC";
 	case MLXSW_REG_SFTR_ID:
 		return "SFTR";
+	case MLXSW_REG_SFDF_ID:
+		return "SFDF";
 	case MLXSW_REG_SLDR_ID:
 		return "SLDR";
 	case MLXSW_REG_SLCR_ID:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ce6845d..a94daa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1979,6 +1979,115 @@
 	.profile		= &mlxsw_sp_config_profile,
 };
 
+static int
+mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT);
+	mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+				    u16 fid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID);
+	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+	mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl,
+						mlxsw_sp_port->local_port);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG);
+	mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+				      u16 fid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID);
+	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+	mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	int err, last_err = 0;
+	u16 vid;
+
+	for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+		err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid);
+		if (err)
+			last_err = err;
+	}
+
+	return last_err;
+}
+
+static int
+__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	int err, last_err = 0;
+	u16 vid;
+
+	for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+		err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid);
+		if (err)
+			last_err = err;
+	}
+
+	return last_err;
+}
+
+static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	if (!list_empty(&mlxsw_sp_port->vports_list))
+		if (mlxsw_sp_port->lagged)
+			return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port);
+		else
+			return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port);
+	else
+		if (mlxsw_sp_port->lagged)
+			return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port);
+		else
+			return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+	u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport);
+	u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+
+	if (mlxsw_sp_vport->lagged)
+		return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport,
+							     fid);
+	else
+		return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid);
+}
+
 static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 {
 	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
@@ -2006,10 +2115,16 @@
 	return 0;
 }
 
-static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+				      bool flush_fdb)
 {
 	struct net_device *dev = mlxsw_sp_port->dev;
 
+	if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port))
+		netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
+
+	mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+
 	mlxsw_sp_port->learning = 0;
 	mlxsw_sp_port->learning_sync = 0;
 	mlxsw_sp_port->uc_flood = 0;
@@ -2200,10 +2315,15 @@
 	return err;
 }
 
+static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
+				       struct net_device *br_dev,
+				       bool flush_fdb);
+
 static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 				   struct net_device *lag_dev)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_port *mlxsw_sp_vport;
 	struct mlxsw_sp_upper *lag;
 	u16 lag_id = mlxsw_sp_port->lag_id;
 	int err;
@@ -2220,7 +2340,30 @@
 	if (err)
 		return err;
 
+	/* In case we leave a LAG device that has bridges built on top,
+	 * then their teardown sequence is never issued and we need to
+	 * invoke the necessary cleanup routines ourselves.
+	 */
+	list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+			    vport.list) {
+		struct net_device *br_dev;
+
+		if (!mlxsw_sp_vport->bridged)
+			continue;
+
+		br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
+		mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false);
+	}
+
+	if (mlxsw_sp_port->bridged) {
+		mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
+		mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
+		mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+	}
+
 	if (lag->ref_count == 1) {
+		if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port))
+			netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
 		err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
 		if (err)
 			return err;
@@ -2272,9 +2415,6 @@
 	return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
 }
 
-static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-				       struct net_device *br_dev);
-
 static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port,
 				   struct net_device *vlan_dev)
 {
@@ -2312,7 +2452,7 @@
 		struct net_device *br_dev;
 
 		br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
-		mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev);
+		mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true);
 	}
 
 	mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
@@ -2374,7 +2514,8 @@
 				}
 				mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev);
 			} else {
-				err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port);
+				err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
+								 true);
 				mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
 				if (err) {
 					netdev_err(dev, "Failed to leave bridge\n");
@@ -2541,7 +2682,8 @@
 }
 
 static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-				       struct net_device *br_dev)
+				       struct net_device *br_dev,
+				       bool flush_fdb)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
 	u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
@@ -2604,6 +2746,16 @@
 		goto err_vport_flood_set;
 	}
 
+	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
+					  MLXSW_REG_SPMS_STATE_FORWARDING);
+	if (err) {
+		netdev_err(dev, "Failed to set STP state\n");
+		goto err_port_stp_state_set;
+	}
+
+	if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport))
+		netdev_err(dev, "Failed to flush FDB\n");
+
 	/* Switch between the vFIDs and destroy the old one if needed. */
 	new_vfid->nr_vports++;
 	mlxsw_sp_vport->vport.vfid = new_vfid;
@@ -2618,6 +2770,7 @@
 
 	return 0;
 
+err_port_stp_state_set:
 err_vport_flood_set:
 err_port_vid_learning_set:
 err_port_vid_to_fid_validate:
@@ -2777,7 +2930,7 @@
 			if (!mlxsw_sp_vport)
 				return NOTIFY_DONE;
 			err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport,
-							  upper_dev);
+							  upper_dev, true);
 			if (err) {
 				netdev_err(dev, "Failed to leave bridge\n");
 				return NOTIFY_BAD;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a23dc61..3b89ed2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -120,7 +120,6 @@
 	} fdb_notify;
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
 	u32 ageing_time;
-	struct mutex fdb_lock;	/* Make sure FDB sessions are atomic. */
 	struct mlxsw_sp_upper master_bridge;
 	struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 };
@@ -254,5 +253,7 @@
 			   __be16 __always_unused proto, u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
 			     bool set, bool only_uc);
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 45479ef..7b56098 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -45,6 +45,7 @@
 #include <linux/if_bridge.h>
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
+#include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
 #include "spectrum.h"
@@ -124,14 +125,14 @@
 	int err;
 
 	switch (state) {
-	case BR_STATE_DISABLED: /* fall-through */
 	case BR_STATE_FORWARDING:
 		spms_state = MLXSW_REG_SPMS_STATE_FORWARDING;
 		break;
-	case BR_STATE_LISTENING: /* fall-through */
 	case BR_STATE_LEARNING:
 		spms_state = MLXSW_REG_SPMS_STATE_LEARNING;
 		break;
+	case BR_STATE_LISTENING: /* fall-through */
+	case BR_STATE_DISABLED: /* fall-through */
 	case BR_STATE_BLOCKING:
 		spms_state = MLXSW_REG_SPMS_STATE_DISCARDING;
 		break;
@@ -369,7 +370,8 @@
 	return err;
 }
 
-static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				    u16 vid)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char spvid_pl[MLXSW_REG_SPVID_LEN];
@@ -378,6 +380,53 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
 }
 
+static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port,
+					    bool allow)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char spaft_pl[MLXSW_REG_SPAFT_LEN];
+
+	mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl);
+}
+
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+	struct net_device *dev = mlxsw_sp_port->dev;
+	int err;
+
+	if (!vid) {
+		err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false);
+		if (err) {
+			netdev_err(dev, "Failed to disallow untagged traffic\n");
+			return err;
+		}
+	} else {
+		err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid);
+		if (err) {
+			netdev_err(dev, "Failed to set PVID\n");
+			return err;
+		}
+
+		/* Only allow if not already allowed. */
+		if (!mlxsw_sp_port->pvid) {
+			err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port,
+							       true);
+			if (err) {
+				netdev_err(dev, "Failed to allow untagged traffic\n");
+				goto err_port_allow_untagged_set;
+			}
+		}
+	}
+
+	mlxsw_sp_port->pvid = vid;
+	return 0;
+
+err_port_allow_untagged_set:
+	__mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid);
+	return err;
+}
+
 static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid)
 {
 	char sfmr_pl[MLXSW_REG_SFMR_LEN];
@@ -539,7 +588,12 @@
 			netdev_err(dev, "Unable to add PVID %d\n", vid_begin);
 			goto err_port_pvid_set;
 		}
-		mlxsw_sp_port->pvid = vid_begin;
+	} else if (!flag_pvid && old_pvid >= vid_begin && old_pvid <= vid_end) {
+		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
+		if (err) {
+			netdev_err(dev, "Unable to del PVID\n");
+			goto err_port_pvid_set;
+		}
 	}
 
 	/* Changing activity bits only if HW operation succeded */
@@ -891,20 +945,18 @@
 		return err;
 	}
 
+	if (init)
+		goto out;
+
 	pvid = mlxsw_sp_port->pvid;
-	if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) {
-		/* Default VLAN is always 1 */
-		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+	if (pvid >= vid_begin && pvid <= vid_end) {
+		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
 		if (err) {
 			netdev_err(dev, "Unable to del PVID %d\n", pvid);
 			return err;
 		}
-		mlxsw_sp_port->pvid = 1;
 	}
 
-	if (init)
-		goto out;
-
 	err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end,
 					false, false);
 	if (err) {
@@ -936,6 +988,14 @@
 					 vlan->vid_begin, vlan->vid_end, false);
 }
 
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	u16 vid;
+
+	for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+		__mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+}
+
 static int
 mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
 			     const struct switchdev_obj_port_fdb *fdb)
@@ -1040,10 +1100,12 @@
 
 static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
 				  struct switchdev_obj_port_fdb *fdb,
-				  switchdev_obj_dump_cb_t *cb)
+				  switchdev_obj_dump_cb_t *cb,
+				  struct net_device *orig_dev)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	u16 vport_vid = 0, vport_fid = 0;
+	struct mlxsw_sp_port *tmp;
+	u16 vport_fid = 0;
 	char *sfd_pl;
 	char mac[ETH_ALEN];
 	u16 fid;
@@ -1058,13 +1120,11 @@
 	if (!sfd_pl)
 		return -ENOMEM;
 
-	mutex_lock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
 	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
 		u16 tmp;
 
 		tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
 		vport_fid = mlxsw_sp_vfid_to_fid(tmp);
-		vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
 	}
 
 	mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
@@ -1088,12 +1148,13 @@
 				mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid,
 							&local_port);
 				if (local_port == mlxsw_sp_port->local_port) {
-					if (vport_fid && vport_fid != fid)
-						continue;
-					else if (vport_fid)
-						fdb->vid = vport_vid;
-					else
+					if (vport_fid && vport_fid == fid)
+						fdb->vid = 0;
+					else if (!vport_fid &&
+						 !mlxsw_sp_fid_is_vfid(fid))
 						fdb->vid = fid;
+					else
+						continue;
 					ether_addr_copy(fdb->addr, mac);
 					fdb->ndm_state = NUD_REACHABLE;
 					err = cb(&fdb->obj);
@@ -1104,14 +1165,22 @@
 			case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
 				mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
 							    mac, &fid, &lag_id);
-				if (mlxsw_sp_port ==
-				    mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
-					if (vport_fid && vport_fid != fid)
+				tmp = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+				if (tmp && tmp->local_port ==
+				    mlxsw_sp_port->local_port) {
+					/* LAG records can only point to LAG
+					 * devices or VLAN devices on top.
+					 */
+					if (!netif_is_lag_master(orig_dev) &&
+					    !is_vlan_dev(orig_dev))
 						continue;
-					else if (vport_fid)
-						fdb->vid = vport_vid;
-					else
+					if (vport_fid && vport_fid == fid)
+						fdb->vid = 0;
+					else if (!vport_fid &&
+						 !mlxsw_sp_fid_is_vfid(fid))
 						fdb->vid = fid;
+					else
+						continue;
 					ether_addr_copy(fdb->addr, mac);
 					fdb->ndm_state = NUD_REACHABLE;
 					err = cb(&fdb->obj);
@@ -1124,7 +1193,6 @@
 	} while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT);
 
 out:
-	mutex_unlock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
 	kfree(sfd_pl);
 	return stored_err ? stored_err : err;
 }
@@ -1176,7 +1244,8 @@
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port,
-					     SWITCHDEV_OBJ_PORT_FDB(obj), cb);
+					     SWITCHDEV_OBJ_PORT_FDB(obj), cb,
+					     obj->orig_dev);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -1194,14 +1263,14 @@
 	.switchdev_port_obj_dump	= mlxsw_sp_port_obj_dump,
 };
 
-static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync,
-					bool adding, char *mac, u16 vid,
+static void mlxsw_sp_fdb_call_notifiers(bool learning_sync, bool adding,
+					char *mac, u16 vid,
 					struct net_device *dev)
 {
 	struct switchdev_notifier_fdb_info info;
 	unsigned long notifier_type;
 
-	if (learning && learning_sync) {
+	if (learning_sync) {
 		info.addr = mac;
 		info.vid = vid;
 		notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
@@ -1237,7 +1306,7 @@
 			netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
 			goto just_remove;
 		}
-		vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+		vid = 0;
 		/* Override the physical port with the vPort. */
 		mlxsw_sp_port = mlxsw_sp_vport;
 	} else {
@@ -1257,8 +1326,7 @@
 
 	if (!do_notification)
 		return;
-	mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-				    mlxsw_sp_port->learning_sync,
+	mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync,
 				    adding, mac, vid, mlxsw_sp_port->dev);
 	return;
 
@@ -1273,6 +1341,7 @@
 						bool adding)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct net_device *dev;
 	char mac[ETH_ALEN];
 	u16 lag_vid = 0;
 	u16 lag_id;
@@ -1298,11 +1367,13 @@
 			goto just_remove;
 		}
 
-		vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-		lag_vid = vid;
+		lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+		dev = mlxsw_sp_vport->dev;
+		vid = 0;
 		/* Override the physical port with the vPort. */
 		mlxsw_sp_port = mlxsw_sp_vport;
 	} else {
+		dev = mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev;
 		vid = fid;
 	}
 
@@ -1319,10 +1390,8 @@
 
 	if (!do_notification)
 		return;
-	mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-				    mlxsw_sp_port->learning_sync,
-				    adding, mac, vid,
-				    mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev);
+	mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac,
+				    vid, dev);
 	return;
 
 just_remove:
@@ -1374,7 +1443,7 @@
 
 	mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
 
-	mutex_lock(&mlxsw_sp->fdb_lock);
+	rtnl_lock();
 	do {
 		mlxsw_reg_sfn_pack(sfn_pl);
 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
@@ -1387,7 +1456,7 @@
 			mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 
 	} while (num_rec);
-	mutex_unlock(&mlxsw_sp->fdb_lock);
+	rtnl_unlock();
 
 	kfree(sfn_pl);
 	mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
@@ -1402,7 +1471,6 @@
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n");
 		return err;
 	}
-	mutex_init(&mlxsw_sp->fdb_lock);
 	INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work);
 	mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL;
 	mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index a10c928..3e67f45 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -28,6 +28,16 @@
 
 #include "moxart_ether.h"
 
+static inline void moxart_desc_write(u32 data, u32 *desc)
+{
+	*desc = cpu_to_le32(data);
+}
+
+static inline u32 moxart_desc_read(u32 *desc)
+{
+	return le32_to_cpu(*desc);
+}
+
 static inline void moxart_emac_write(struct net_device *ndev,
 				     unsigned int reg, unsigned long value)
 {
@@ -112,7 +122,7 @@
 static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-	void __iomem *desc;
+	void *desc;
 	int i;
 
 	for (i = 0; i < TX_DESC_NUM; i++) {
@@ -121,7 +131,7 @@
 
 		priv->tx_buf[i] = priv->tx_buf_base + priv->tx_buf_size * i;
 	}
-	writel(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
+	moxart_desc_write(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
 
 	priv->tx_head = 0;
 	priv->tx_tail = 0;
@@ -129,8 +139,8 @@
 	for (i = 0; i < RX_DESC_NUM; i++) {
 		desc = priv->rx_desc_base + i * RX_REG_DESC_SIZE;
 		memset(desc, 0, RX_REG_DESC_SIZE);
-		writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
-		writel(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
+		moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+		moxart_desc_write(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
 		       desc + RX_REG_OFFSET_DESC1);
 
 		priv->rx_buf[i] = priv->rx_buf_base + priv->rx_buf_size * i;
@@ -141,12 +151,12 @@
 		if (dma_mapping_error(&ndev->dev, priv->rx_mapping[i]))
 			netdev_err(ndev, "DMA mapping error\n");
 
-		writel(priv->rx_mapping[i],
+		moxart_desc_write(priv->rx_mapping[i],
 		       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_PHYS);
-		writel(priv->rx_buf[i],
+		moxart_desc_write((uintptr_t)priv->rx_buf[i],
 		       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_VIRT);
 	}
-	writel(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
+	moxart_desc_write(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
 
 	priv->rx_head = 0;
 
@@ -201,14 +211,15 @@
 						      napi);
 	struct net_device *ndev = priv->ndev;
 	struct sk_buff *skb;
-	void __iomem *desc;
+	void *desc;
 	unsigned int desc0, len;
 	int rx_head = priv->rx_head;
 	int rx = 0;
 
 	while (rx < budget) {
 		desc = priv->rx_desc_base + (RX_REG_DESC_SIZE * rx_head);
-		desc0 = readl(desc + RX_REG_OFFSET_DESC0);
+		desc0 = moxart_desc_read(desc + RX_REG_OFFSET_DESC0);
+		rmb(); /* ensure desc0 is up to date */
 
 		if (desc0 & RX_DESC0_DMA_OWN)
 			break;
@@ -250,7 +261,8 @@
 			priv->stats.multicast++;
 
 rx_next:
-		writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+		wmb(); /* prevent setting ownership back too early */
+		moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
 
 		rx_head = RX_NEXT(rx_head);
 		priv->rx_head = rx_head;
@@ -310,7 +322,7 @@
 static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-	void __iomem *desc;
+	void *desc;
 	unsigned int len;
 	unsigned int tx_head = priv->tx_head;
 	u32 txdes1;
@@ -319,11 +331,12 @@
 	desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
 
 	spin_lock_irq(&priv->txlock);
-	if (readl(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
+	if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
 		net_dbg_ratelimited("no TX space for packet\n");
 		priv->stats.tx_dropped++;
 		goto out_unlock;
 	}
+	rmb(); /* ensure data is only read that had TX_DESC0_DMA_OWN cleared */
 
 	len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len;
 
@@ -337,9 +350,9 @@
 	priv->tx_len[tx_head] = len;
 	priv->tx_skb[tx_head] = skb;
 
-	writel(priv->tx_mapping[tx_head],
+	moxart_desc_write(priv->tx_mapping[tx_head],
 	       desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_PHYS);
-	writel(skb->data,
+	moxart_desc_write((uintptr_t)skb->data,
 	       desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_VIRT);
 
 	if (skb->len < ETH_ZLEN) {
@@ -354,8 +367,9 @@
 	txdes1 = TX_DESC1_LTS | TX_DESC1_FTS | (len & TX_DESC1_BUF_SIZE_MASK);
 	if (tx_head == TX_DESC_NUM_MASK)
 		txdes1 |= TX_DESC1_END;
-	writel(txdes1, desc + TX_REG_OFFSET_DESC1);
-	writel(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
+	moxart_desc_write(txdes1, desc + TX_REG_OFFSET_DESC1);
+	wmb(); /* flush descriptor before transferring ownership */
+	moxart_desc_write(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
 
 	/* start to send packet */
 	writel(0xffffffff, priv->base + REG_TX_POLL_DEMAND);
@@ -460,9 +474,9 @@
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ndev->base_addr = res->start;
 	priv->base = devm_ioremap_resource(p_dev, res);
-	ret = IS_ERR(priv->base);
-	if (ret) {
+	if (IS_ERR(priv->base)) {
 		dev_err(p_dev, "devm_ioremap_resource failed\n");
+		ret = PTR_ERR(priv->base);
 		goto init_fail;
 	}
 
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index 2be9280..93a9563 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -300,7 +300,7 @@
 
 	dma_addr_t rx_base;
 	dma_addr_t rx_mapping[RX_DESC_NUM];
-	void __iomem *rx_desc_base;
+	void *rx_desc_base;
 	unsigned char *rx_buf_base;
 	unsigned char *rx_buf[RX_DESC_NUM];
 	unsigned int rx_head;
@@ -308,7 +308,7 @@
 
 	dma_addr_t tx_base;
 	dma_addr_t tx_mapping[TX_DESC_NUM];
-	void __iomem *tx_desc_base;
+	void *tx_desc_base;
 	unsigned char *tx_buf_base;
 	unsigned char *tx_buf[RX_DESC_NUM];
 	unsigned int tx_head;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 50d5604..e0993eb 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2223,8 +2223,6 @@
 	return IRQ_NONE;
 }
 
-#ifdef CONFIG_PCI_MSI
-
 static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
 {
 	struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
@@ -2442,16 +2440,13 @@
 	if (vdev->config.intr_type == MSI_X)
 		pci_disable_msix(vdev->pdev);
 }
-#endif
 
 static void vxge_rem_isr(struct vxgedev *vdev)
 {
-#ifdef CONFIG_PCI_MSI
-	if (vdev->config.intr_type == MSI_X) {
+	if (IS_ENABLED(CONFIG_PCI_MSI) &&
+	    vdev->config.intr_type == MSI_X) {
 		vxge_rem_msix_isr(vdev);
-	} else
-#endif
-	if (vdev->config.intr_type == INTA) {
+	} else if (vdev->config.intr_type == INTA) {
 			synchronize_irq(vdev->pdev->irq);
 			free_irq(vdev->pdev->irq, vdev);
 	}
@@ -2460,11 +2455,10 @@
 static int vxge_add_isr(struct vxgedev *vdev)
 {
 	int ret = 0;
-#ifdef CONFIG_PCI_MSI
 	int vp_idx = 0, intr_idx = 0, intr_cnt = 0, msix_idx = 0, irq_req = 0;
 	int pci_fun = PCI_FUNC(vdev->pdev->devfn);
 
-	if (vdev->config.intr_type == MSI_X)
+	if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X)
 		ret = vxge_enable_msix(vdev);
 
 	if (ret) {
@@ -2475,7 +2469,7 @@
 		vdev->config.intr_type = INTA;
 	}
 
-	if (vdev->config.intr_type == MSI_X) {
+	if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X) {
 		for (intr_idx = 0;
 		     intr_idx < (vdev->no_of_vpath *
 			VXGE_HW_VPATH_MSIX_ACTIVE); intr_idx++) {
@@ -2576,9 +2570,8 @@
 		vdev->vxge_entries[intr_cnt].in_use = 1;
 		vdev->vxge_entries[intr_cnt].arg = &vdev->vpaths[0];
 	}
-INTA_MODE:
-#endif
 
+INTA_MODE:
 	if (vdev->config.intr_type == INTA) {
 		snprintf(vdev->desc[0], VXGE_INTR_STRLEN,
 			"%s:vxge:INTA", vdev->ndev->name);
@@ -3889,12 +3882,12 @@
 	if (max_mac_vpath > VXGE_MAX_MAC_ADDR_COUNT)
 		max_mac_vpath = VXGE_MAX_MAC_ADDR_COUNT;
 
-#ifndef CONFIG_PCI_MSI
-	vxge_debug_init(VXGE_ERR,
-		"%s: This Kernel does not support "
-		"MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
-	*intr_type = INTA;
-#endif
+	if (!IS_ENABLED(CONFIG_PCI_MSI)) {
+		vxge_debug_init(VXGE_ERR,
+			"%s: This Kernel does not support "
+			"MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
+		*intr_type = INTA;
+	}
 
 	/* Configure whether MSI-X or IRQL. */
 	switch (*intr_type) {
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 689a4a5..1ef0393 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -811,7 +811,7 @@
 	dev->netdev_ops = &qcaspi_netdev_ops;
 	qcaspi_set_ethtool_ops(dev);
 	dev->watchdog_timeo = QCASPI_TX_TIMEOUT;
-	dev->flags = IFF_MULTICAST;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->tx_queue_len = 100;
 
 	qca = netdev_priv(dev);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 17d5571..dd2cf37 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4933,8 +4933,6 @@
 		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_40:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
-		break;
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_42:
 	case RTL_GIGA_MAC_VER_43:
@@ -4943,8 +4941,6 @@
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_47:
 	case RTL_GIGA_MAC_VER_48:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST | RX_EARLY_OFF);
-		break;
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
@@ -6137,28 +6133,28 @@
 		sw_cnt_1ms_ini = 16000000/rg_saw_cnt;
 		sw_cnt_1ms_ini &= 0x0fff;
 		data = r8168_mac_ocp_read(tp, 0xd412);
-		data &= 0x0fff;
+		data &= ~0x0fff;
 		data |= sw_cnt_1ms_ini;
 		r8168_mac_ocp_write(tp, 0xd412, data);
 	}
 
 	data = r8168_mac_ocp_read(tp, 0xe056);
-	data &= 0xf0;
-	data |= 0x07;
+	data &= ~0xf0;
+	data |= 0x70;
 	r8168_mac_ocp_write(tp, 0xe056, data);
 
 	data = r8168_mac_ocp_read(tp, 0xe052);
-	data &= 0x8008;
-	data |= 0x6000;
+	data &= ~0x6000;
+	data |= 0x8008;
 	r8168_mac_ocp_write(tp, 0xe052, data);
 
 	data = r8168_mac_ocp_read(tp, 0xe0d6);
-	data &= 0x01ff;
+	data &= ~0x01ff;
 	data |= 0x017f;
 	r8168_mac_ocp_write(tp, 0xe0d6, data);
 
 	data = r8168_mac_ocp_read(tp, 0xd420);
-	data &= 0x0fff;
+	data &= ~0x0fff;
 	data |= 0x047f;
 	r8168_mac_ocp_write(tp, 0xd420, data);
 
@@ -7730,10 +7726,13 @@
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
+	struct pci_dev *pdev = tp->pci_dev;
 	struct rtl8169_counters *counters = tp->counters;
 	unsigned int start;
 
-	if (netif_running(dev))
+	pm_runtime_get_noresume(&pdev->dev);
+
+	if (netif_running(dev) && pm_runtime_active(&pdev->dev))
 		rtl8169_rx_missed(dev, ioaddr);
 
 	do {
@@ -7761,7 +7760,8 @@
 	 * Fetch additonal counter values missing in stats collected by driver
 	 * from tally counters.
 	 */
-	rtl8169_update_counters(dev);
+	if (pm_runtime_active(&pdev->dev))
+		rtl8169_update_counters(dev);
 
 	/*
 	 * Subtract values fetched during initalization.
@@ -7774,6 +7774,8 @@
 	stats->tx_aborted_errors = le16_to_cpu(counters->tx_aborted) -
 		le16_to_cpu(tp->tc_offset.tx_aborted);
 
+	pm_runtime_put_noidle(&pdev->dev);
+
 	return stats;
 }
 
@@ -7853,6 +7855,10 @@
 
 	rtl8169_net_suspend(dev);
 
+	/* Update counters before going runtime suspend */
+	rtl8169_rx_missed(dev, tp->mmio_addr);
+	rtl8169_update_counters(dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index ac43ed9..86449c3 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1139,7 +1139,8 @@
 	if (netif_running(ndev)) {
 		netif_device_detach(ndev);
 		/* Stop PTP Clock driver */
-		ravb_ptp_stop(ndev);
+		if (priv->chip_id == RCAR_GEN2)
+			ravb_ptp_stop(ndev);
 		/* Wait for DMA stopping */
 		error = ravb_stop_dma(ndev);
 		if (error) {
@@ -1170,7 +1171,8 @@
 		ravb_emac_init(ndev);
 
 		/* Initialise PTP Clock driver */
-		ravb_ptp_init(ndev, priv->pdev);
+		if (priv->chip_id == RCAR_GEN2)
+			ravb_ptp_init(ndev, priv->pdev);
 
 		netif_device_attach(ndev);
 	}
@@ -1298,7 +1300,8 @@
 	netif_tx_stop_all_queues(ndev);
 
 	/* Stop PTP Clock driver */
-	ravb_ptp_stop(ndev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_stop(ndev);
 
 	/* Wait for DMA stopping */
 	ravb_stop_dma(ndev);
@@ -1311,7 +1314,8 @@
 	ravb_emac_init(ndev);
 
 	/* Initialise PTP Clock driver */
-	ravb_ptp_init(ndev, priv->pdev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_init(ndev, priv->pdev);
 
 	netif_tx_start_all_queues(ndev);
 }
@@ -1718,7 +1722,6 @@
 static int ravb_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
-	const struct of_device_id *match;
 	struct ravb_private *priv;
 	enum ravb_chip_id chip_id;
 	struct net_device *ndev;
@@ -1750,8 +1753,7 @@
 	ndev->base_addr = res->start;
 	ndev->dma = -1;
 
-	match = of_match_device(of_match_ptr(ravb_match_table), &pdev->dev);
-	chip_id = (enum ravb_chip_id)match->data;
+	chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
 
 	if (chip_id == RCAR_GEN3)
 		irq = platform_get_irq_byname(pdev, "ch22");
@@ -1814,10 +1816,6 @@
 			   CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC);
 	}
 
-	/* Set CSEL value */
-	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB,
-		   CCC);
-
 	/* Set GTI value */
 	error = ravb_set_gti(ndev);
 	if (error)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index dfa9e59..7384499 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3061,15 +3061,11 @@
 	mdp->ether_link_active_low = pd->ether_link_active_low;
 
 	/* set cpu data */
-	if (id) {
+	if (id)
 		mdp->cd = (struct sh_eth_cpu_data *)id->driver_data;
-	} else	{
-		const struct of_device_id *match;
+	else
+		mdp->cd = (struct sh_eth_cpu_data *)of_device_get_match_data(&pdev->dev);
 
-		match = of_match_device(of_match_ptr(sh_eth_match_table),
-					&pdev->dev);
-		mdp->cd = (struct sh_eth_cpu_data *)match->data;
-	}
 	mdp->reg_offset = sh_eth_get_register_offset(mdp->cd->register_type);
 	if (!mdp->reg_offset) {
 		dev_err(&pdev->dev, "Unknown register type (%d)\n",
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index a4ab71d..166a7fc 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3531,12 +3531,14 @@
 	info.addr = lw->addr;
 	info.vid = lw->vid;
 
+	rtnl_lock();
 	if (learned && removing)
 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
 					 lw->rocker_port->dev, &info.info);
 	else if (learned && !removing)
 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD,
 					 lw->rocker_port->dev, &info.info);
+	rtnl_unlock();
 
 	rocker_port_kfree(lw->trans, work);
 }
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 0e2fc1a..db7db8a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2342,8 +2342,8 @@
 	}
 
 	ndev->irq = platform_get_irq(pdev, 0);
-	if (ndev->irq <= 0) {
-		ret = -ENODEV;
+	if (ndev->irq < 0) {
+		ret = ndev->irq;
 		goto out_release_io;
 	}
 	/*
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 0faf163..efb54f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -199,21 +199,12 @@
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
 	int addr, found;
-	struct device_node *mdio_node = NULL;
-	struct device_node *child_node = NULL;
+	struct device_node *mdio_node = priv->plat->mdio_node;
 
 	if (!mdio_bus_data)
 		return 0;
 
 	if (IS_ENABLED(CONFIG_OF)) {
-		for_each_child_of_node(priv->device->of_node, child_node) {
-			if (of_device_is_compatible(child_node,
-						    "snps,dwmac-mdio")) {
-				mdio_node = child_node;
-				break;
-			}
-		}
-
 		if (mdio_node) {
 			netdev_dbg(ndev, "FOUND MDIO subnode\n");
 		} else {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 6a52fa1..4514ba7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -110,6 +110,7 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
+	struct device_node *child_node = NULL;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -140,13 +141,19 @@
 		plat->phy_node = of_node_get(np);
 	}
 
+	for_each_child_of_node(np, child_node)
+		if (of_device_is_compatible(child_node,	"snps,dwmac-mdio")) {
+			plat->mdio_node = child_node;
+			break;
+		}
+
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */
 	if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
-	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name)
+	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || !plat->mdio_node)
 		plat->mdio_bus_data = NULL;
 	else
 		plat->mdio_bus_data =
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index cc106d8..23fa298 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -389,17 +389,27 @@
 	if (vio_version_after_eq(&port->vio, 1, 8)) {
 		struct vio_net_dext *dext = vio_net_ext(desc);
 
+		skb_reset_network_header(skb);
+
 		if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
 			if (skb->protocol == ETH_P_IP) {
-				struct iphdr *iph = (struct iphdr *)skb->data;
+				struct iphdr *iph = ip_hdr(skb);
 
 				iph->check = 0;
 				ip_send_check(iph);
 			}
 		}
 		if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
-		    skb->ip_summed == CHECKSUM_NONE)
-			vnet_fullcsum(skb);
+		    skb->ip_summed == CHECKSUM_NONE) {
+			if (skb->protocol == htons(ETH_P_IP)) {
+				struct iphdr *iph = ip_hdr(skb);
+				int ihl = iph->ihl * 4;
+
+				skb_reset_transport_header(skb);
+				skb_set_transport_header(skb, ihl);
+				vnet_fullcsum(skb);
+			}
+		}
 		if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
 			skb->ip_summed = CHECKSUM_PARTIAL;
 			skb->csum_level = 0;
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index 70814b7..af11ed1 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -426,7 +426,7 @@
 #define DWC_MMC_RXOCTETCOUNT_GB          0x0784
 #define DWC_MMC_RXPACKETCOUNT_GB         0x0780
 
-static int debug = 3;
+static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "DWC_eth_qos debug level (0=none,...,16=all)");
 
@@ -650,6 +650,11 @@
 	u32 mmc_tx_counters_mask;
 
 	struct dwceqos_flowcontrol flowcontrol;
+
+	/* Tracks the intermediate state of phy started but hardware
+	 * init not finished yet.
+	 */
+	bool phy_defer;
 };
 
 static void dwceqos_read_mmc_counters(struct net_local *lp, u32 rx_mask,
@@ -901,6 +906,9 @@
 	struct phy_device *phydev = lp->phy_dev;
 	int status_change = 0;
 
+	if (lp->phy_defer)
+		return;
+
 	if (phydev->link) {
 		if ((lp->speed != phydev->speed) ||
 		    (lp->duplex != phydev->duplex)) {
@@ -1113,7 +1121,7 @@
 	/* Allocate DMA descriptors */
 	size = DWCEQOS_RX_DCNT * sizeof(struct dwceqos_dma_desc);
 	lp->rx_descs = dma_alloc_coherent(lp->ndev->dev.parent, size,
-			&lp->rx_descs_addr, 0);
+			&lp->rx_descs_addr, GFP_KERNEL);
 	if (!lp->rx_descs)
 		goto err_out;
 	lp->rx_descs_tail_addr = lp->rx_descs_addr +
@@ -1121,7 +1129,7 @@
 
 	size = DWCEQOS_TX_DCNT * sizeof(struct dwceqos_dma_desc);
 	lp->tx_descs = dma_alloc_coherent(lp->ndev->dev.parent, size,
-			&lp->tx_descs_addr, 0);
+			&lp->tx_descs_addr, GFP_KERNEL);
 	if (!lp->tx_descs)
 		goto err_out;
 	lp->tx_descs_tail_addr = lp->tx_descs_addr +
@@ -1635,6 +1643,12 @@
 	regval = dwceqos_read(lp, REG_DWCEQOS_MAC_CFG);
 	dwceqos_write(lp, REG_DWCEQOS_MAC_CFG,
 		      regval | DWCEQOS_MAC_CFG_TE | DWCEQOS_MAC_CFG_RE);
+
+	lp->phy_defer = false;
+	mutex_lock(&lp->phy_dev->lock);
+	phy_read_status(lp->phy_dev);
+	dwceqos_adjust_link(lp->ndev);
+	mutex_unlock(&lp->phy_dev->lock);
 }
 
 static void dwceqos_tx_reclaim(unsigned long data)
@@ -1880,9 +1894,13 @@
 	}
 	netdev_reset_queue(ndev);
 
-	napi_enable(&lp->napi);
+	/* The dwceqos reset state machine requires all phy clocks to complete,
+	 * hence the unusual init order with phy_start first.
+	 */
+	lp->phy_defer = true;
 	phy_start(lp->phy_dev);
 	dwceqos_init_hw(lp);
+	napi_enable(&lp->napi);
 
 	netif_start_queue(ndev);
 	tasklet_enable(&lp->tx_bdreclaim_tasklet);
@@ -1915,18 +1933,19 @@
 {
 	struct net_local *lp = netdev_priv(ndev);
 
-	phy_stop(lp->phy_dev);
-
 	tasklet_disable(&lp->tx_bdreclaim_tasklet);
-	netif_stop_queue(ndev);
 	napi_disable(&lp->napi);
 
-	dwceqos_drain_dma(lp);
+	/* Stop all tx before we drain the tx dma. */
+	netif_tx_lock_bh(lp->ndev);
+	netif_stop_queue(ndev);
+	netif_tx_unlock_bh(lp->ndev);
 
-	netif_tx_lock(lp->ndev);
+	dwceqos_drain_dma(lp);
 	dwceqos_reset_hw(lp);
+	phy_stop(lp->phy_dev);
+
 	dwceqos_descriptor_free(lp);
-	netif_tx_unlock(lp->ndev);
 
 	return 0;
 }
@@ -2178,12 +2197,10 @@
 		((trans.initial_descriptor + trans.nr_descriptors) %
 		 DWCEQOS_TX_DCNT));
 
-	dwceqos_tx_finalize(skb, lp, &trans);
-
-	netdev_sent_queue(ndev, skb->len);
-
 	spin_lock_bh(&lp->tx_lock);
 	lp->tx_free -= trans.nr_descriptors;
+	dwceqos_tx_finalize(skb, lp, &trans);
+	netdev_sent_queue(ndev, skb->len);
 	spin_unlock_bh(&lp->tx_lock);
 
 	ndev->trans_start = jiffies;
diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index e9cc61e..c3e85ac 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -63,8 +63,12 @@
 		mode = AM33XX_GMII_SEL_MODE_RGMII;
 		break;
 
-	case PHY_INTERFACE_MODE_MII:
 	default:
+		dev_warn(priv->dev,
+			 "Unsupported PHY mode: \"%s\". Defaulting to MII.\n",
+			phy_modes(phy_mode));
+		/* fallthrough */
+	case PHY_INTERFACE_MODE_MII:
 		mode = AM33XX_GMII_SEL_MODE_MII;
 		break;
 	};
@@ -106,8 +110,12 @@
 		mode = AM33XX_GMII_SEL_MODE_RGMII;
 		break;
 
-	case PHY_INTERFACE_MODE_MII:
 	default:
+		dev_warn(priv->dev,
+			 "Unsupported PHY mode: \"%s\". Defaulting to MII.\n",
+			phy_modes(phy_mode));
+		/* fallthrough */
+	case PHY_INTERFACE_MODE_MII:
 		mode = AM33XX_GMII_SEL_MODE_MII;
 		break;
 	};
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 657b65b..18bf3a8 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -82,7 +82,7 @@
 
 struct cpdma_desc_pool {
 	phys_addr_t		phys;
-	u32			hw_addr;
+	dma_addr_t		hw_addr;
 	void __iomem		*iomap;		/* ioremap map */
 	void			*cpumap;	/* dma_alloc map */
 	int			desc_size, mem_size;
@@ -152,7 +152,7 @@
  * abstract out these details
  */
 static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
+cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
 				int size, int align)
 {
 	int bitmap_size;
@@ -176,13 +176,13 @@
 
 	if (phys) {
 		pool->phys  = phys;
-		pool->iomap = ioremap(phys, size);
+		pool->iomap = ioremap(phys, size); /* should be memremap? */
 		pool->hw_addr = hw_addr;
 	} else {
-		pool->cpumap = dma_alloc_coherent(dev, size, &pool->phys,
+		pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr,
 						  GFP_KERNEL);
-		pool->iomap = pool->cpumap;
-		pool->hw_addr = pool->phys;
+		pool->iomap = (void __iomem __force *)pool->cpumap;
+		pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */
 	}
 
 	if (pool->iomap)
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index c61d66d..029841f 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -117,21 +117,17 @@
 	*ndesc = le32_to_cpu(desc->next_desc);
 }
 
-static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc)
+static u32 get_sw_data(int index, struct knav_dma_desc *desc)
 {
-	*pad0 = le32_to_cpu(desc->pad[0]);
-	*pad1 = le32_to_cpu(desc->pad[1]);
-	*pad2 = le32_to_cpu(desc->pad[2]);
+	/* No Endian conversion needed as this data is untouched by hw */
+	return desc->sw_data[index];
 }
 
-static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc)
-{
-	u64 pad64;
-
-	pad64 = le32_to_cpu(desc->pad[0]) +
-		((u64)le32_to_cpu(desc->pad[1]) << 32);
-	*padptr = (void *)(uintptr_t)pad64;
-}
+/* use these macros to get sw data */
+#define GET_SW_DATA0(desc) get_sw_data(0, desc)
+#define GET_SW_DATA1(desc) get_sw_data(1, desc)
+#define GET_SW_DATA2(desc) get_sw_data(2, desc)
+#define GET_SW_DATA3(desc) get_sw_data(3, desc)
 
 static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len,
 			     struct knav_dma_desc *desc)
@@ -163,13 +159,18 @@
 	desc->packet_info = cpu_to_le32(pkt_info);
 }
 
-static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc)
+static void set_sw_data(int index, u32 data, struct knav_dma_desc *desc)
 {
-	desc->pad[0] = cpu_to_le32(pad0);
-	desc->pad[1] = cpu_to_le32(pad1);
-	desc->pad[2] = cpu_to_le32(pad1);
+	/* No Endian conversion needed as this data is untouched by hw */
+	desc->sw_data[index] = data;
 }
 
+/* use these macros to set sw data */
+#define SET_SW_DATA0(data, desc) set_sw_data(0, data, desc)
+#define SET_SW_DATA1(data, desc) set_sw_data(1, data, desc)
+#define SET_SW_DATA2(data, desc) set_sw_data(2, data, desc)
+#define SET_SW_DATA3(data, desc) set_sw_data(3, data, desc)
+
 static void set_org_pkt_info(dma_addr_t buff, u32 buff_len,
 			     struct knav_dma_desc *desc)
 {
@@ -581,7 +582,6 @@
 	dma_addr_t dma_desc, dma_buf;
 	unsigned int buf_len, dma_sz = sizeof(*ndesc);
 	void *buf_ptr;
-	u32 pad[2];
 	u32 tmp;
 
 	get_words(&dma_desc, 1, &desc->next_desc);
@@ -593,14 +593,20 @@
 			break;
 		}
 		get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
-		get_pad_ptr(&buf_ptr, ndesc);
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		buf_ptr = (void *)GET_SW_DATA0(ndesc);
+		buf_len = (int)GET_SW_DATA1(desc);
 		dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
 		__free_page(buf_ptr);
 		knav_pool_desc_put(netcp->rx_pool, desc);
 	}
-
-	get_pad_info(&pad[0], &pad[1], &buf_len, desc);
-	buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+	/* warning!!!! We are retrieving the virtual ptr in the sw_data
+	 * field as a 32bit value. Will not work on 64bit machines
+	 */
+	buf_ptr = (void *)GET_SW_DATA0(desc);
+	buf_len = (int)GET_SW_DATA1(desc);
 
 	if (buf_ptr)
 		netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
@@ -639,7 +645,6 @@
 	dma_addr_t dma_desc, dma_buff;
 	struct netcp_packet p_info;
 	struct sk_buff *skb;
-	u32 pad[2];
 	void *org_buf_ptr;
 
 	dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
@@ -653,8 +658,11 @@
 	}
 
 	get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
-	get_pad_info(&pad[0], &pad[1], &org_buf_len, desc);
-	org_buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+	/* warning!!!! We are retrieving the virtual ptr in the sw_data
+	 * field as a 32bit value. Will not work on 64bit machines
+	 */
+	org_buf_ptr = (void *)GET_SW_DATA0(desc);
+	org_buf_len = (int)GET_SW_DATA1(desc);
 
 	if (unlikely(!org_buf_ptr)) {
 		dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
@@ -679,7 +687,6 @@
 	/* Fill in the page fragment list */
 	while (dma_desc) {
 		struct page *page;
-		void *ptr;
 
 		ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
 		if (unlikely(!ndesc)) {
@@ -688,8 +695,10 @@
 		}
 
 		get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
-		get_pad_ptr(&ptr, ndesc);
-		page = ptr;
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		page = (struct page *)GET_SW_DATA0(desc);
 
 		if (likely(dma_buff && buf_len && page)) {
 			dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
@@ -777,7 +786,10 @@
 		}
 
 		get_org_pkt_info(&dma, &buf_len, desc);
-		get_pad_ptr(&buf_ptr, desc);
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		buf_ptr = (void *)GET_SW_DATA0(desc);
 
 		if (unlikely(!dma)) {
 			dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n");
@@ -829,7 +841,7 @@
 	struct page *page;
 	dma_addr_t dma;
 	void *bufptr;
-	u32 pad[3];
+	u32 sw_data[2];
 
 	/* Allocate descriptor */
 	hwdesc = knav_pool_desc_get(netcp->rx_pool);
@@ -846,7 +858,7 @@
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 		bufptr = netdev_alloc_frag(primary_buf_len);
-		pad[2] = primary_buf_len;
+		sw_data[1] = primary_buf_len;
 
 		if (unlikely(!bufptr)) {
 			dev_warn_ratelimited(netcp->ndev_dev,
@@ -858,9 +870,10 @@
 		if (unlikely(dma_mapping_error(netcp->dev, dma)))
 			goto fail;
 
-		pad[0] = lower_32_bits((uintptr_t)bufptr);
-		pad[1] = upper_32_bits((uintptr_t)bufptr);
-
+		/* warning!!!! We are saving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		sw_data[0] = (u32)bufptr;
 	} else {
 		/* Allocate a secondary receive queue entry */
 		page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
@@ -870,9 +883,11 @@
 		}
 		buf_len = PAGE_SIZE;
 		dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE);
-		pad[0] = lower_32_bits(dma);
-		pad[1] = upper_32_bits(dma);
-		pad[2] = 0;
+		/* warning!!!! We are saving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		sw_data[0] = (u32)page;
+		sw_data[1] = 0;
 	}
 
 	desc_info =  KNAV_DMA_DESC_PS_INFO_IN_DESC;
@@ -882,7 +897,8 @@
 	pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) <<
 		    KNAV_DMA_DESC_RETQ_SHIFT;
 	set_org_pkt_info(dma, buf_len, hwdesc);
-	set_pad_info(pad[0], pad[1], pad[2], hwdesc);
+	SET_SW_DATA0(sw_data[0], hwdesc);
+	SET_SW_DATA1(sw_data[1], hwdesc);
 	set_desc_info(desc_info, pkt_info, hwdesc);
 
 	/* Push to FDQs */
@@ -971,7 +987,6 @@
 					  unsigned int budget)
 {
 	struct knav_dma_desc *desc;
-	void *ptr;
 	struct sk_buff *skb;
 	unsigned int dma_sz;
 	dma_addr_t dma;
@@ -988,8 +1003,10 @@
 			continue;
 		}
 
-		get_pad_ptr(&ptr, desc);
-		skb = ptr;
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		skb = (struct sk_buff *)GET_SW_DATA0(desc);
 		netcp_free_tx_desc_chain(netcp, desc, dma_sz);
 		if (!skb) {
 			dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
@@ -1194,10 +1211,10 @@
 	}
 
 	set_words(&tmp, 1, &desc->packet_info);
-	tmp = lower_32_bits((uintptr_t)&skb);
-	set_words(&tmp, 1, &desc->pad[0]);
-	tmp = upper_32_bits((uintptr_t)&skb);
-	set_words(&tmp, 1, &desc->pad[1]);
+	/* warning!!!! We are saving the virtual ptr in the sw_data
+	 * field as a 32bit value. Will not work on 64bit machines
+	 */
+	SET_SW_DATA0((u32)skb, desc);
 
 	if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) {
 		tmp = tx_pipe->switch_to_port;
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 7f975a2..b0de8ec 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -533,8 +533,8 @@
 	const char *print_name = dev_name(bdev);
 	struct net_device *dev;
 	DFX_board_t	  *bp;			/* board pointer */
-	resource_size_t bar_start[3];		/* pointers to ports */
-	resource_size_t bar_len[3];		/* resource length */
+	resource_size_t bar_start[3] = {0};	/* pointers to ports */
+	resource_size_t bar_len[3] = {0};	/* resource length */
 	int alloc_size;				/* total buffer size used */
 	struct resource *region;
 	int err = 0;
@@ -3697,8 +3697,8 @@
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
 	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
-	resource_size_t bar_start[3];		/* pointers to ports */
-	resource_size_t bar_len[3];		/* resource lengths */
+	resource_size_t bar_start[3] = {0};	/* pointers to ports */
+	resource_size_t bar_len[3] = {0};	/* resource lengths */
 	int		alloc_size;		/* total buffer size used */
 
 	unregister_netdev(dev);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 7456569..0bf7edd 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -980,9 +980,9 @@
 			opts = ip_tunnel_info_opts(info);
 
 		if (key->tun_flags & TUNNEL_CSUM)
-			flags |= GENEVE_F_UDP_CSUM;
+			flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX;
 		else
-			flags &= ~GENEVE_F_UDP_CSUM;
+			flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
 
 		err = geneve6_build_skb(dst, skb, key->tun_flags, vni,
 					info->options_len, opts,
@@ -1039,6 +1039,34 @@
 	return geneve_xmit_skb(skb, dev, info);
 }
 
+static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
+{
+	/* The max_mtu calculation does not take account of GENEVE
+	 * options, to avoid excluding potentially valid
+	 * configurations.
+	 */
+	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
+		- dev->hard_header_len;
+
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	if (new_mtu > max_mtu) {
+		if (strict)
+			return -EINVAL;
+
+		new_mtu = max_mtu;
+	}
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static int geneve_change_mtu(struct net_device *dev, int new_mtu)
+{
+	return __geneve_change_mtu(dev, new_mtu, true);
+}
+
 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -1083,7 +1111,7 @@
 	.ndo_stop		= geneve_stop,
 	.ndo_start_xmit		= geneve_xmit,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
-	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_change_mtu		= geneve_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_fill_metadata_dst	= geneve_fill_metadata_dst,
@@ -1150,6 +1178,7 @@
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
 	netif_keep_dst(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 	eth_hw_addr_random(dev);
 }
@@ -1441,12 +1470,23 @@
 		return dev;
 
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
-			       0, 0, 0, htons(dst_port), true, 0);
-	if (err) {
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
+			       0, 0, 0, htons(dst_port), true,
+			       GENEVE_F_UDP_ZERO_CSUM6_RX);
+	if (err)
+		goto err;
+
+	/* openvswitch users expect packet sizes to be unrestricted,
+	 * so set the largest MTU we can.
+	 */
+	err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
+	if (err)
+		goto err;
+
 	return dev;
+
+ err:
+	free_netdev(dev);
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
 
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index f4130af..fcb92c0 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -624,6 +624,7 @@
 #define RNDIS_PKT_ALIGN_DEFAULT 8
 
 struct multi_send_data {
+	struct sk_buff *skb; /* skb containing the pkt */
 	struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
 	u32 count; /* counter of batched packets */
 };
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 059fc52..ec313fc 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -841,6 +841,18 @@
 	return ret;
 }
 
+/* Move packet out of multi send data (msd), and clear msd */
+static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
+				struct sk_buff **msd_skb,
+				struct multi_send_data *msdp)
+{
+	*msd_skb = msdp->skb;
+	*msd_send = msdp->pkt;
+	msdp->skb = NULL;
+	msdp->pkt = NULL;
+	msdp->count = 0;
+}
+
 int netvsc_send(struct hv_device *device,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
@@ -855,6 +867,7 @@
 	unsigned int section_index = NETVSC_INVALID_INDEX;
 	struct multi_send_data *msdp;
 	struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+	struct sk_buff *msd_skb = NULL;
 	bool try_batch;
 	bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
@@ -897,10 +910,8 @@
 		   net_device->send_section_size) {
 		section_index = netvsc_get_next_send_section(net_device);
 		if (section_index != NETVSC_INVALID_INDEX) {
-				msd_send = msdp->pkt;
-				msdp->pkt = NULL;
-				msdp->count = 0;
-				msd_len = 0;
+			move_pkt_msd(&msd_send, &msd_skb, msdp);
+			msd_len = 0;
 		}
 	}
 
@@ -919,31 +930,31 @@
 			packet->total_data_buflen += msd_len;
 		}
 
-		if (msdp->pkt)
-			dev_kfree_skb_any(skb);
+		if (msdp->skb)
+			dev_kfree_skb_any(msdp->skb);
 
 		if (xmit_more && !packet->cp_partial) {
+			msdp->skb = skb;
 			msdp->pkt = packet;
 			msdp->count++;
 		} else {
 			cur_send = packet;
+			msdp->skb = NULL;
 			msdp->pkt = NULL;
 			msdp->count = 0;
 		}
 	} else {
-		msd_send = msdp->pkt;
-		msdp->pkt = NULL;
-		msdp->count = 0;
+		move_pkt_msd(&msd_send, &msd_skb, msdp);
 		cur_send = packet;
 	}
 
 	if (msd_send) {
-		m_ret = netvsc_send_pkt(msd_send, net_device, pb, skb);
+		m_ret = netvsc_send_pkt(msd_send, net_device, NULL, msd_skb);
 
 		if (m_ret != 0) {
 			netvsc_free_send_slot(net_device,
 					      msd_send->send_buf_index);
-			dev_kfree_skb_any(skb);
+			dev_kfree_skb_any(msd_skb);
 		}
 	}
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1c8db9a..98e34fe 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -196,65 +196,6 @@
 	return ppi;
 }
 
-union sub_key {
-	u64 k;
-	struct {
-		u8 pad[3];
-		u8 kb;
-		u32 ka;
-	};
-};
-
-/* Toeplitz hash function
- * data: network byte order
- * return: host byte order
- */
-static u32 comp_hash(u8 *key, int klen, void *data, int dlen)
-{
-	union sub_key subk;
-	int k_next = 4;
-	u8 dt;
-	int i, j;
-	u32 ret = 0;
-
-	subk.k = 0;
-	subk.ka = ntohl(*(u32 *)key);
-
-	for (i = 0; i < dlen; i++) {
-		subk.kb = key[k_next];
-		k_next = (k_next + 1) % klen;
-		dt = ((u8 *)data)[i];
-		for (j = 0; j < 8; j++) {
-			if (dt & 0x80)
-				ret ^= subk.ka;
-			dt <<= 1;
-			subk.k <<= 1;
-		}
-	}
-
-	return ret;
-}
-
-static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
-{
-	struct flow_keys flow;
-	int data_len;
-
-	if (!skb_flow_dissect_flow_keys(skb, &flow, 0) ||
-	    !(flow.basic.n_proto == htons(ETH_P_IP) ||
-	      flow.basic.n_proto == htons(ETH_P_IPV6)))
-		return false;
-
-	if (flow.basic.ip_proto == IPPROTO_TCP)
-		data_len = 12;
-	else
-		data_len = 8;
-
-	*hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len);
-
-	return true;
-}
-
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -267,11 +208,9 @@
 	if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
 		return 0;
 
-	if (netvsc_set_hash(&hash, skb)) {
-		q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
-			ndev->real_num_tx_queues;
-		skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
-	}
+	hash = skb_get_hash(skb);
+	q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+		ndev->real_num_tx_queues;
 
 	if (!nvsc_dev->chn_table[q_idx])
 		q_idx = 0;
@@ -1150,6 +1089,9 @@
 	net->ethtool_ops = &ethtool_ops;
 	SET_NETDEV_DEV(net, &dev->device);
 
+	/* We always need headroom for rndis header */
+	net->needed_headroom = RNDIS_AND_PPI_SIZE;
+
 	/* Notify the netvsc driver of the new device */
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
diff --git a/drivers/net/irda/bfin_sir.h b/drivers/net/irda/bfin_sir.h
index 29cbde8..d47cf14 100644
--- a/drivers/net/irda/bfin_sir.h
+++ b/drivers/net/irda/bfin_sir.h
@@ -82,9 +82,6 @@
 
 #define DRIVER_NAME "bfin_sir"
 
-#define port_membase(port)     (((struct bfin_sir_port *)(port))->membase)
-#define get_lsr_cache(port)    (((struct bfin_sir_port *)(port))->lsr)
-#define put_lsr_cache(port, v) (((struct bfin_sir_port *)(port))->lsr = (v))
 #include <asm/bfin_serial.h>
 
 static const unsigned short per[][4] = {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 6a57a00..94e6888 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1323,6 +1323,7 @@
 
 	list_add_tail_rcu(&vlan->list, &port->vlans);
 	netif_stacked_transfer_operstate(lowerdev, dev);
+	linkwatch_fire_event(dev);
 
 	return 0;
 
@@ -1522,6 +1523,7 @@
 	port = macvlan_port_get_rtnl(dev);
 
 	switch (event) {
+	case NETDEV_UP:
 	case NETDEV_CHANGE:
 		list_for_each_entry(vlan, &port->vlans, list)
 			netif_stacked_transfer_operstate(vlan->lowerdev,
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 60994a8..f0a77020 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -186,6 +186,7 @@
 config MDIO_OCTEON
 	tristate "Support for MDIO buses on Octeon and ThunderX SOCs"
 	depends on 64BIT
+	depends on HAS_IOMEM
 	help
 
 	  This module provides a driver for the Octeon and ThunderX MDIO
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index bf241a3..db507e3 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -250,10 +250,6 @@
 	phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO);
 	phy_read(phydev, MII_BCM7XXX_AUX_MODE);
 
-	/* Workaround only required for 100Mbits/sec capable PHYs */
-	if (phydev->supported & PHY_GBIT_FEATURES)
-		return 0;
-
 	/* set shadow mode 2 */
 	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
 			MII_BCM7XXX_SHD_MODE_2, MII_BCM7XXX_SHD_MODE_2);
@@ -270,7 +266,7 @@
 	phy_write(phydev, MII_BCM7XXX_100TX_FALSE_CAR, 0x7555);
 
 	/* reset shadow mode 2 */
-	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, MII_BCM7XXX_SHD_MODE_2, 0);
+	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, MII_BCM7XXX_SHD_MODE_2);
 	if (ret < 0)
 		return ret;
 
@@ -307,11 +303,6 @@
 	return 0;
 }
 
-static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
-{
-	return 0;
-}
-
 #define BCM7XXX_28NM_GPHY(_oui, _name)					\
 {									\
 	.phy_id		= (_oui),					\
@@ -337,7 +328,7 @@
 	.phy_id         = PHY_ID_BCM7425,
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM7425",
-	.features       = PHY_GBIT_FEATURES |
+	.features       = PHY_BASIC_FEATURES |
 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 	.flags          = PHY_IS_INTERNAL,
 	.config_init    = bcm7xxx_config_init,
@@ -349,7 +340,7 @@
 	.phy_id         = PHY_ID_BCM7429,
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM7429",
-	.features       = PHY_GBIT_FEATURES |
+	.features       = PHY_BASIC_FEATURES |
 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 	.flags          = PHY_IS_INTERNAL,
 	.config_init    = bcm7xxx_config_init,
@@ -361,7 +352,7 @@
 	.phy_id         = PHY_ID_BCM7435,
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM7435",
-	.features       = PHY_GBIT_FEATURES |
+	.features       = PHY_BASIC_FEATURES |
 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 	.flags          = PHY_IS_INTERNAL,
 	.config_init    = bcm7xxx_config_init,
@@ -369,30 +360,6 @@
 	.read_status    = genphy_read_status,
 	.suspend        = bcm7xxx_suspend,
 	.resume         = bcm7xxx_config_init,
-}, {
-	.phy_id		= PHY_BCM_OUI_4,
-	.phy_id_mask	= 0xffff0000,
-	.name		= "Broadcom BCM7XXX 40nm",
-	.features	= PHY_GBIT_FEATURES |
-			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-	.flags		= PHY_IS_INTERNAL,
-	.config_init	= bcm7xxx_config_init,
-	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
-	.suspend	= bcm7xxx_suspend,
-	.resume		= bcm7xxx_config_init,
-}, {
-	.phy_id		= PHY_BCM_OUI_5,
-	.phy_id_mask	= 0xffffff00,
-	.name		= "Broadcom BCM7XXX 65nm",
-	.features	= PHY_BASIC_FEATURES |
-			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-	.flags		= PHY_IS_INTERNAL,
-	.config_init	= bcm7xxx_dummy_config_init,
-	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
-	.suspend	= bcm7xxx_suspend,
-	.resume		= bcm7xxx_config_init,
 } };
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
@@ -404,8 +371,6 @@
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7435, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
-	{ PHY_BCM_OUI_4, 0xffff0000 },
-	{ PHY_BCM_OUI_5, 0xffffff00 },
 	{ }
 };
 
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 180f699..7a240fc 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -846,6 +846,11 @@
 	struct skb_shared_hwtstamps *shhwtstamps = NULL;
 	struct sk_buff *skb;
 	unsigned long flags;
+	u8 overflow;
+
+	overflow = (phy_rxts->ns_hi >> 14) & 0x3;
+	if (overflow)
+		pr_debug("rx timestamp queue overflow, count %d\n", overflow);
 
 	spin_lock_irqsave(&dp83640->rx_lock, flags);
 
@@ -888,6 +893,7 @@
 	struct skb_shared_hwtstamps shhwtstamps;
 	struct sk_buff *skb;
 	u64 ns;
+	u8 overflow;
 
 	/* We must already have the skb that triggered this. */
 
@@ -897,6 +903,17 @@
 		pr_debug("have timestamp but tx_queue empty\n");
 		return;
 	}
+
+	overflow = (phy_txts->ns_hi >> 14) & 0x3;
+	if (overflow) {
+		pr_debug("tx timestamp queue overflow, count %d\n", overflow);
+		while (skb) {
+			skb_complete_tx_timestamp(skb, NULL);
+			skb = skb_dequeue(&dp83640->tx_queue);
+		}
+		return;
+	}
+
 	ns = phy2txts(phy_txts);
 	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 	shhwtstamps.hwtstamp = ns_to_ktime(ns);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e3eb964..ab1d0fc 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -446,6 +446,12 @@
 	if (err < 0)
 		return err;
 
+	return 0;
+}
+
+static int marvell_config_init(struct phy_device *phydev)
+{
+	/* Set registers from marvell,reg-init DT property */
 	return marvell_of_reg_init(phydev);
 }
 
@@ -495,7 +501,7 @@
 
 	mdelay(500);
 
-	return 0;
+	return marvell_config_init(phydev);
 }
 
 static int m88e3016_config_init(struct phy_device *phydev)
@@ -514,7 +520,7 @@
 	if (reg < 0)
 		return reg;
 
-	return 0;
+	return marvell_config_init(phydev);
 }
 
 static int m88e1111_config_init(struct phy_device *phydev)
@@ -1078,6 +1084,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.probe = marvell_probe,
 		.flags = PHY_HAS_INTERRUPT,
+		.config_init = &marvell_config_init,
 		.config_aneg = &marvell_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1149,6 +1156,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1167,6 +1175,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1259,6 +1268,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1277,6 +1287,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 03833db..dc85f70 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -297,6 +297,17 @@
 	if (priv->led_mode >= 0)
 		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
 
+	if (phy_interrupt_is_valid(phydev)) {
+		int ctl = phy_read(phydev, MII_BMCR);
+
+		if (ctl < 0)
+			return ctl;
+
+		ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -635,6 +646,21 @@
 		data[i] = kszphy_get_stat(phydev, i);
 }
 
+static int kszphy_resume(struct phy_device *phydev)
+{
+	int value;
+
+	mutex_lock(&phydev->lock);
+
+	value = phy_read(phydev, MII_BMCR);
+	phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
+
+	kszphy_config_intr(phydev);
+	mutex_unlock(&phydev->lock);
+
+	return 0;
+}
+
 static int kszphy_probe(struct phy_device *phydev)
 {
 	const struct kszphy_type *type = phydev->drv->driver_data;
@@ -844,7 +870,7 @@
 	.get_strings	= kszphy_get_strings,
 	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
-	.resume		= genphy_resume,
+	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ8061,
 	.name		= "Micrel KSZ8061",
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 8763bb2..5590b9c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -692,25 +692,29 @@
 	struct phy_device *phydev =
 		container_of(work, struct phy_device, phy_queue);
 
-	if (phydev->drv->did_interrupt &&
-	    !phydev->drv->did_interrupt(phydev))
-		goto ignore;
+	if (phy_interrupt_is_valid(phydev)) {
+		if (phydev->drv->did_interrupt &&
+		    !phydev->drv->did_interrupt(phydev))
+			goto ignore;
 
-	if (phy_disable_interrupts(phydev))
-		goto phy_err;
+		if (phy_disable_interrupts(phydev))
+			goto phy_err;
+	}
 
 	mutex_lock(&phydev->lock);
 	if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
 		phydev->state = PHY_CHANGELINK;
 	mutex_unlock(&phydev->lock);
 
-	atomic_dec(&phydev->irq_disable);
-	enable_irq(phydev->irq);
+	if (phy_interrupt_is_valid(phydev)) {
+		atomic_dec(&phydev->irq_disable);
+		enable_irq(phydev->irq);
 
-	/* Reenable interrupts */
-	if (PHY_HALTED != phydev->state &&
-	    phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
-		goto irq_enable_err;
+		/* Reenable interrupts */
+		if (PHY_HALTED != phydev->state &&
+		    phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
+			goto irq_enable_err;
+	}
 
 	/* reschedule state queue work to run as soon as possible */
 	cancel_delayed_work_sync(&phydev->state_queue);
@@ -905,10 +909,10 @@
 		phydev->adjust_link(phydev->attached_dev);
 		break;
 	case PHY_RUNNING:
-		/* Only register a CHANGE if we are polling or ignoring
-		 * interrupts and link changed since latest checking.
+		/* Only register a CHANGE if we are polling and link changed
+		 * since latest checking.
 		 */
-		if (!phy_interrupt_is_valid(phydev)) {
+		if (phydev->irq == PHY_POLL) {
 			old_link = phydev->link;
 			err = phy_read_status(phydev);
 			if (err)
@@ -1000,15 +1004,21 @@
 		   phy_state_to_str(old_state),
 		   phy_state_to_str(phydev->state));
 
-	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
-			   PHY_STATE_TIME * HZ);
+	/* Only re-schedule a PHY state machine change if we are polling the
+	 * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
+	 * between states from phy_mac_interrupt()
+	 */
+	if (phydev->irq == PHY_POLL)
+		queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+				   PHY_STATE_TIME * HZ);
 }
 
 void phy_mac_interrupt(struct phy_device *phydev, int new_link)
 {
-	cancel_work_sync(&phydev->phy_queue);
 	phydev->link = new_link;
-	schedule_work(&phydev->phy_queue);
+
+	/* Trigger a state machine change */
+	queue_work(system_power_efficient_wq, &phydev->phy_queue);
 }
 EXPORT_SYMBOL(phy_mac_interrupt);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bad3f00..e551f3a 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1410,7 +1410,7 @@
 
 	features = (SUPPORTED_TP | SUPPORTED_MII
 			| SUPPORTED_AUI | SUPPORTED_FIBRE |
-			SUPPORTED_BNC);
+			SUPPORTED_BNC | SUPPORTED_Pause | SUPPORTED_Asym_Pause);
 
 	/* Do we support autonegotiation? */
 	val = phy_read(phydev, MII_BMSR);
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index e485f26..2e21e93 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -24,6 +24,10 @@
 #include <linux/netdevice.h>
 #include <linux/smscphy.h>
 
+struct smsc_phy_priv {
+	bool energy_enable;
+};
+
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
 	int rc = phy_write (phydev, MII_LAN83C185_IM,
@@ -43,19 +47,14 @@
 
 static int smsc_phy_config_init(struct phy_device *phydev)
 {
-	int __maybe_unused len;
-	struct device *dev __maybe_unused = &phydev->mdio.dev;
-	struct device_node *of_node __maybe_unused = dev->of_node;
+	struct smsc_phy_priv *priv = phydev->priv;
+
 	int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
-	int enable_energy = 1;
 
 	if (rc < 0)
 		return rc;
 
-	if (of_find_property(of_node, "smsc,disable-energy-detect", &len))
-		enable_energy = 0;
-
-	if (enable_energy) {
+	if (priv->energy_enable) {
 		/* Enable energy detect mode for this SMSC Transceivers */
 		rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
 			       rc | MII_LAN83C185_EDPWRDOWN);
@@ -110,10 +109,13 @@
  */
 static int lan87xx_read_status(struct phy_device *phydev)
 {
-	int err = genphy_read_status(phydev);
-	int i;
+	struct smsc_phy_priv *priv = phydev->priv;
 
-	if (!phydev->link) {
+	int err = genphy_read_status(phydev);
+
+	if (!phydev->link && priv->energy_enable) {
+		int i;
+
 		/* Disable EDPD to wake up PHY */
 		int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
 		if (rc < 0)
@@ -149,6 +151,26 @@
 	return err;
 }
 
+static int smsc_phy_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct device_node *of_node = dev->of_node;
+	struct smsc_phy_priv *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->energy_enable = true;
+
+	if (of_property_read_bool(of_node, "smsc,disable-energy-detect"))
+		priv->energy_enable = false;
+
+	phydev->priv = priv;
+
+	return 0;
+}
+
 static struct phy_driver smsc_phy_driver[] = {
 {
 	.phy_id		= 0x0007c0a0, /* OUI=0x00800f, Model#=0x0a */
@@ -159,6 +181,8 @@
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+	.probe		= smsc_phy_probe,
+
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -180,6 +204,8 @@
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+	.probe		= smsc_phy_probe,
+
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -201,6 +227,8 @@
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+	.probe		= smsc_phy_probe,
+
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= lan87xx_read_status,
@@ -222,6 +250,8 @@
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+	.probe		= smsc_phy_probe,
+
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -242,6 +272,8 @@
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+	.probe		= smsc_phy_probe,
+
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= lan87xx_read_status,
@@ -263,6 +295,8 @@
 				| SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+	.probe		= smsc_phy_probe,
+
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= lan87xx_read_status,
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fc8ad00..d61da9ec 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -443,9 +443,14 @@
 			 * network traffic (demand mode).
 			 */
 			struct ppp *ppp = PF_TO_PPP(pf);
+
+			ppp_recv_lock(ppp);
 			if (ppp->n_channels == 0 &&
-			    (ppp->flags & SC_LOOP_TRAFFIC) == 0)
+			    (ppp->flags & SC_LOOP_TRAFFIC) == 0) {
+				ppp_recv_unlock(ppp);
 				break;
+			}
+			ppp_recv_unlock(ppp);
 		}
 		ret = -EAGAIN;
 		if (file->f_flags & O_NONBLOCK)
@@ -532,9 +537,12 @@
 	else if (pf->kind == INTERFACE) {
 		/* see comment in ppp_read */
 		struct ppp *ppp = PF_TO_PPP(pf);
+
+		ppp_recv_lock(ppp);
 		if (ppp->n_channels == 0 &&
 		    (ppp->flags & SC_LOOP_TRAFFIC) == 0)
 			mask |= POLLIN | POLLRDNORM;
+		ppp_recv_unlock(ppp);
 	}
 
 	return mask;
@@ -2808,6 +2816,7 @@
 
 out2:
 	mutex_unlock(&pn->all_ppp_mutex);
+	rtnl_unlock();
 	free_netdev(dev);
 out1:
 	*retp = ret;
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index f3c6302..4ddae81 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -395,6 +395,8 @@
 
 		if (!__pppoe_xmit(sk_pppox(relay_po), skb))
 			goto abort_put;
+
+		sock_put(sk_pppox(relay_po));
 	} else {
 		if (sock_queue_rcv_skb(sk, skb))
 			goto abort_kfree;
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 90868ca..ae0905e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -129,24 +129,27 @@
 	return i < MAX_CALLID;
 }
 
-static int add_chan(struct pppox_sock *sock)
+static int add_chan(struct pppox_sock *sock,
+		    struct pptp_addr *sa)
 {
 	static int call_id;
 
 	spin_lock(&chan_lock);
-	if (!sock->proto.pptp.src_addr.call_id)	{
+	if (!sa->call_id)	{
 		call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1);
 		if (call_id == MAX_CALLID) {
 			call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1);
 			if (call_id == MAX_CALLID)
 				goto out_err;
 		}
-		sock->proto.pptp.src_addr.call_id = call_id;
-	} else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap))
+		sa->call_id = call_id;
+	} else if (test_bit(sa->call_id, callid_bitmap)) {
 		goto out_err;
+	}
 
-	set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
-	rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock);
+	sock->proto.pptp.src_addr = *sa;
+	set_bit(sa->call_id, callid_bitmap);
+	rcu_assign_pointer(callid_sock[sa->call_id], sock);
 	spin_unlock(&chan_lock);
 
 	return 0;
@@ -416,7 +419,6 @@
 	struct sock *sk = sock->sk;
 	struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
-	struct pptp_opt *opt = &po->proto.pptp;
 	int error = 0;
 
 	if (sockaddr_len < sizeof(struct sockaddr_pppox))
@@ -424,10 +426,22 @@
 
 	lock_sock(sk);
 
-	opt->src_addr = sp->sa_addr.pptp;
-	if (add_chan(po))
-		error = -EBUSY;
+	if (sk->sk_state & PPPOX_DEAD) {
+		error = -EALREADY;
+		goto out;
+	}
 
+	if (sk->sk_state & PPPOX_BOUND) {
+		error = -EBUSY;
+		goto out;
+	}
+
+	if (add_chan(po, &sp->sa_addr.pptp))
+		error = -EBUSY;
+	else
+		sk->sk_state |= PPPOX_BOUND;
+
+out:
 	release_sock(sk);
 	return error;
 }
@@ -498,7 +512,7 @@
 	}
 
 	opt->dst_addr = sp->sa_addr.pptp;
-	sk->sk_state = PPPOX_CONNECTED;
+	sk->sk_state |= PPPOX_CONNECTED;
 
  end:
 	release_sock(sk);
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 7f83504..cdde590 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -395,6 +395,10 @@
 	  The protocol specification is incomplete, and is controlled by
 	  (and for) Microsoft; it isn't an "Open" ecosystem or market.
 
+config USB_NET_CDC_SUBSET_ENABLE
+	tristate
+	depends on USB_NET_CDC_SUBSET
+
 config USB_NET_CDC_SUBSET
 	tristate "Simple USB Network Links (CDC Ethernet subset)"
 	depends on USB_USBNET
@@ -413,6 +417,7 @@
 config USB_ALI_M5632
 	bool "ALi M5632 based 'USB 2.0 Data Link' cables"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option if you're using a host-to-host cable
 	  based on this design, which supports USB 2.0 high speed.
@@ -420,6 +425,7 @@
 config USB_AN2720
 	bool "AnchorChips 2720 based cables (Xircom PGUNET, ...)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option if you're using a host-to-host cable
 	  based on this design.  Note that AnchorChips is now a
@@ -428,6 +434,7 @@
 config USB_BELKIN
 	bool "eTEK based host-to-host cables (Advance, Belkin, ...)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	default y
 	help
 	  Choose this option if you're using a host-to-host cable
@@ -437,6 +444,7 @@
 config USB_ARMLINUX
 	bool "Embedded ARM Linux links (iPaq, ...)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	default y
 	help
 	  Choose this option to support the "usb-eth" networking driver
@@ -454,6 +462,7 @@
 config USB_EPSON2888
 	bool "Epson 2888 based firmware (DEVELOPMENT)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option to support the usb networking links used
 	  by some sample firmware from Epson.
@@ -461,6 +470,7 @@
 config USB_KC2190
 	bool "KT Technology KC2190 based cables (InstaNet)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option if you're using a host-to-host cable
 	  with one of these chips.
diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile
index b5f0406..37fb46ae 100644
--- a/drivers/net/usb/Makefile
+++ b/drivers/net/usb/Makefile
@@ -23,7 +23,7 @@
 obj-$(CONFIG_USB_NET_NET1080)	+= net1080.o
 obj-$(CONFIG_USB_NET_PLUSB)	+= plusb.o
 obj-$(CONFIG_USB_NET_RNDIS_HOST)	+= rndis_host.o
-obj-$(CONFIG_USB_NET_CDC_SUBSET)	+= cdc_subset.o
+obj-$(CONFIG_USB_NET_CDC_SUBSET_ENABLE)	+= cdc_subset.o
 obj-$(CONFIG_USB_NET_ZAURUS)	+= zaurus.o
 obj-$(CONFIG_USB_NET_MCS7830)	+= mcs7830.o
 obj-$(CONFIG_USB_USBNET)	+= usbnet.o
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 224e7d8..cf77f2d 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -134,7 +134,6 @@
 
 	netdev_info(dev->net, "deregistering mdio bus %s\n", priv->mdio->id);
 	mdiobus_unregister(priv->mdio);
-	kfree(priv->mdio->irq);
 	mdiobus_free(priv->mdio);
 }
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index dc0212c..86ba30b 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -837,7 +837,11 @@
 
 	iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber;
 
-	/* reset data interface */
+	/* Reset data interface. Some devices will not reset properly
+	 * unless they are configured first.  Toggle the altsetting to
+	 * force a reset
+	 */
+	usb_set_interface(dev->udev, iface_no, data_altsetting);
 	temp = usb_set_interface(dev->udev, iface_no, 0);
 	if (temp) {
 		dev_dbg(&intf->dev, "set interface failed\n");
@@ -984,8 +988,6 @@
 
 static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
 {
-	int ret;
-
 	/* MBIM backwards compatible function? */
 	if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
 		return -ENODEV;
@@ -994,16 +996,7 @@
 	 * Additionally, generic NCM devices are assumed to accept arbitrarily
 	 * placed NDP.
 	 */
-	ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
-
-	/*
-	 * We should get an event when network connection is "connected" or
-	 * "disconnected". Set network connection in "disconnected" state
-	 * (carrier is OFF) during attach, so the IP network stack does not
-	 * start IPv6 negotiation and more.
-	 */
-	usbnet_link_change(dev, 0, 0);
-	return ret;
+	return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
 }
 
 static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max)
@@ -1586,7 +1579,8 @@
 
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
-	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
+	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+			| FLAG_LINK_INTR,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,
@@ -1599,7 +1593,7 @@
 static const struct driver_info wwan_info = {
 	.description = "Mobile Broadband Network Device",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-			| FLAG_WWAN,
+			| FLAG_LINK_INTR | FLAG_WWAN,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,
@@ -1612,7 +1606,7 @@
 static const struct driver_info wwan_noarp_info = {
 	.description = "Mobile Broadband Network Device (NO ARP)",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-			| FLAG_WWAN | FLAG_NOARP,
+			| FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 2ed5333..1c299b8 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -36,7 +36,7 @@
 #define DRIVER_AUTHOR	"WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC	"LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME	"lan78xx"
-#define DRIVER_VERSION	"1.0.1"
+#define DRIVER_VERSION	"1.0.2"
 
 #define TX_TIMEOUT_JIFFIES		(5 * HZ)
 #define THROTTLE_JIFFIES		(HZ / 8)
@@ -462,32 +462,53 @@
 				   u32 length, u8 *data)
 {
 	u32 val;
+	u32 saved;
 	int i, ret;
+	int retval;
 
-	ret = lan78xx_eeprom_confirm_not_busy(dev);
-	if (ret)
-		return ret;
+	/* depends on chip, some EEPROM pins are muxed with LED function.
+	 * disable & restore LED function to access EEPROM.
+	 */
+	ret = lan78xx_read_reg(dev, HW_CFG, &val);
+	saved = val;
+	if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+		val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+		ret = lan78xx_write_reg(dev, HW_CFG, val);
+	}
+
+	retval = lan78xx_eeprom_confirm_not_busy(dev);
+	if (retval)
+		return retval;
 
 	for (i = 0; i < length; i++) {
 		val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_;
 		val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
 		ret = lan78xx_write_reg(dev, E2P_CMD, val);
-		if (unlikely(ret < 0))
-			return -EIO;
+		if (unlikely(ret < 0)) {
+			retval = -EIO;
+			goto exit;
+		}
 
-		ret = lan78xx_wait_eeprom(dev);
-		if (ret < 0)
-			return ret;
+		retval = lan78xx_wait_eeprom(dev);
+		if (retval < 0)
+			goto exit;
 
 		ret = lan78xx_read_reg(dev, E2P_DATA, &val);
-		if (unlikely(ret < 0))
-			return -EIO;
+		if (unlikely(ret < 0)) {
+			retval = -EIO;
+			goto exit;
+		}
 
 		data[i] = val & 0xFF;
 		offset++;
 	}
 
-	return 0;
+	retval = 0;
+exit:
+	if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+		ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+	return retval;
 }
 
 static int lan78xx_read_eeprom(struct lan78xx_net *dev, u32 offset,
@@ -509,44 +530,67 @@
 				    u32 length, u8 *data)
 {
 	u32 val;
+	u32 saved;
 	int i, ret;
+	int retval;
 
-	ret = lan78xx_eeprom_confirm_not_busy(dev);
-	if (ret)
-		return ret;
+	/* depends on chip, some EEPROM pins are muxed with LED function.
+	 * disable & restore LED function to access EEPROM.
+	 */
+	ret = lan78xx_read_reg(dev, HW_CFG, &val);
+	saved = val;
+	if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+		val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+		ret = lan78xx_write_reg(dev, HW_CFG, val);
+	}
+
+	retval = lan78xx_eeprom_confirm_not_busy(dev);
+	if (retval)
+		goto exit;
 
 	/* Issue write/erase enable command */
 	val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_;
 	ret = lan78xx_write_reg(dev, E2P_CMD, val);
-	if (unlikely(ret < 0))
-		return -EIO;
+	if (unlikely(ret < 0)) {
+		retval = -EIO;
+		goto exit;
+	}
 
-	ret = lan78xx_wait_eeprom(dev);
-	if (ret < 0)
-		return ret;
+	retval = lan78xx_wait_eeprom(dev);
+	if (retval < 0)
+		goto exit;
 
 	for (i = 0; i < length; i++) {
 		/* Fill data register */
 		val = data[i];
 		ret = lan78xx_write_reg(dev, E2P_DATA, val);
-		if (ret < 0)
-			return ret;
+		if (ret < 0) {
+			retval = -EIO;
+			goto exit;
+		}
 
 		/* Send "write" command */
 		val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_;
 		val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
 		ret = lan78xx_write_reg(dev, E2P_CMD, val);
-		if (ret < 0)
-			return ret;
+		if (ret < 0) {
+			retval = -EIO;
+			goto exit;
+		}
 
-		ret = lan78xx_wait_eeprom(dev);
-		if (ret < 0)
-			return ret;
+		retval = lan78xx_wait_eeprom(dev);
+		if (retval < 0)
+			goto exit;
 
 		offset++;
 	}
 
-	return 0;
+	retval = 0;
+exit:
+	if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+		ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+	return retval;
 }
 
 static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
@@ -904,7 +948,6 @@
 
 	if (!phydev->link && dev->link_on) {
 		dev->link_on = false;
-		netif_carrier_off(dev->net);
 
 		/* reset MAC */
 		ret = lan78xx_read_reg(dev, MAC_CR, &buf);
@@ -914,6 +957,8 @@
 		ret = lan78xx_write_reg(dev, MAC_CR, buf);
 		if (unlikely(ret < 0))
 			return -EIO;
+
+		phy_mac_interrupt(phydev, 0);
 	} else if (phydev->link && !dev->link_on) {
 		dev->link_on = true;
 
@@ -953,7 +998,7 @@
 			  ethtool_cmd_speed(&ecmd), ecmd.duplex, ladv, radv);
 
 		ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv);
-		netif_carrier_on(dev->net);
+		phy_mac_interrupt(phydev, 1);
 	}
 
 	return ret;
@@ -1495,7 +1540,6 @@
 static int lan78xx_mdio_init(struct lan78xx_net *dev)
 {
 	int ret;
-	int i;
 
 	dev->mdiobus = mdiobus_alloc();
 	if (!dev->mdiobus) {
@@ -1511,10 +1555,6 @@
 	snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d",
 		 dev->udev->bus->busnum, dev->udev->devnum);
 
-	/* handle our own interrupt */
-	for (i = 0; i < PHY_MAX_ADDR; i++)
-		dev->mdiobus->irq[i] = PHY_IGNORE_INTERRUPT;
-
 	switch (dev->devid & ID_REV_CHIP_ID_MASK_) {
 	case 0x78000000:
 	case 0x78500000:
@@ -1558,6 +1598,16 @@
 		return -EIO;
 	}
 
+	/* Enable PHY interrupts.
+	 * We handle our own interrupt
+	 */
+	ret = phy_read(phydev, LAN88XX_INT_STS);
+	ret = phy_write(phydev, LAN88XX_INT_MASK,
+			LAN88XX_INT_MASK_MDINTPIN_EN_ |
+			LAN88XX_INT_MASK_LINK_CHANGE_);
+
+	phydev->irq = PHY_IGNORE_INTERRUPT;
+
 	ret = phy_connect_direct(dev->net, phydev,
 				 lan78xx_link_status_change,
 				 PHY_INTERFACE_MODE_GMII);
@@ -1580,14 +1630,6 @@
 			      SUPPORTED_Pause | SUPPORTED_Asym_Pause);
 	genphy_config_aneg(phydev);
 
-	/* Workaround to enable PHY interrupt.
-	 * phy_start_interrupts() is API for requesting and enabling
-	 * PHY interrupt. However, USB-to-Ethernet device can't use
-	 * request_irq() called in phy_start_interrupts().
-	 * Set PHY to PHY_HALTED and call phy_start()
-	 * to make a call to phy_enable_interrupts()
-	 */
-	phy_stop(phydev);
 	phy_start(phydev);
 
 	netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
@@ -2221,7 +2263,9 @@
 	if (skb2) {
 		skb_queue_tail(&dev->txq_pend, skb2);
 
-		if (skb_queue_len(&dev->txq_pend) > 10)
+		/* throttle TX patch at slower than SUPER SPEED USB */
+		if ((dev->udev->speed < USB_SPEED_SUPER) &&
+		    (skb_queue_len(&dev->txq_pend) > 10))
 			netif_stop_queue(net);
 	} else {
 		netif_dbg(dev, tx_err, dev->net,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 23e9880..a3a4ccf 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -637,6 +637,7 @@
 
 	/* 3. Combined interface devices matching on interface number */
 	{QMI_FIXED_INTF(0x0408, 0xea42, 4)},	/* Yota / Megafon M100-1 */
+	{QMI_FIXED_INTF(0x05c6, 0x6001, 3)},	/* 4G LTE usb-modem U901 */
 	{QMI_FIXED_INTF(0x05c6, 0x7000, 0)},
 	{QMI_FIXED_INTF(0x05c6, 0x7001, 1)},
 	{QMI_FIXED_INTF(0x05c6, 0x7002, 1)},
@@ -860,8 +861,10 @@
 	{QMI_FIXED_INTF(0x1199, 0x9056, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx/EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx/EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */
@@ -884,6 +887,7 @@
 	{QMI_FIXED_INTF(0x413c, 0x81a8, 8)},	/* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81a9, 8)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b1, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+	{QMI_FIXED_INTF(0x413c, 0x81b3, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 0b0ba7e..1079812 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1769,6 +1769,13 @@
 	if (info->unbind)
 		info->unbind (dev, udev);
 out1:
+	/* subdrivers must undo all they did in bind() if they
+	 * fail it, but we may fail later and a deferred kevent
+	 * may trigger an error resubmitting itself and, worse,
+	 * schedule a timer. So we kill it all just in case.
+	 */
+	cancel_work_sync(&dev->kevent);
+	del_timer_sync(&dev->delay);
 	free_netdev(net);
 out:
 	return status;
diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index 221a530..72ba8ae 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h
@@ -377,7 +377,7 @@
 #define VMXNET3_TX_RING_MAX_SIZE   4096
 #define VMXNET3_TC_RING_MAX_SIZE   4096
 #define VMXNET3_RX_RING_MAX_SIZE   4096
-#define VMXNET3_RX_RING2_MAX_SIZE  2048
+#define VMXNET3_RX_RING2_MAX_SIZE  4096
 #define VMXNET3_RC_RING_MAX_SIZE   8192
 
 /* a list of reasons for queue stop */
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 0cbf520..fc895d0 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -814,7 +814,7 @@
 
 
 /*
- *    parse and copy relevant protocol headers:
+ *    parse relevant protocol headers:
  *      For a tso pkt, relevant headers are L2/3/4 including options
  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
  *      if it's a TCP/UDP pkt
@@ -827,15 +827,14 @@
  * Other effects:
  *    1. related *ctx fields are updated.
  *    2. ctx->copy_size is # of bytes copied
- *    3. the portion copied is guaranteed to be in the linear part
+ *    3. the portion to be copied is guaranteed to be in the linear part
  *
  */
 static int
-vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
-			   struct vmxnet3_tx_ctx *ctx,
-			   struct vmxnet3_adapter *adapter)
+vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+		  struct vmxnet3_tx_ctx *ctx,
+		  struct vmxnet3_adapter *adapter)
 {
-	struct Vmxnet3_TxDataDesc *tdd;
 	u8 protocol = 0;
 
 	if (ctx->mss) {	/* TSO */
@@ -892,16 +891,34 @@
 		return 0;
 	}
 
+	return 1;
+err:
+	return -1;
+}
+
+/*
+ *    copy relevant protocol headers to the transmit ring:
+ *      For a tso pkt, relevant headers are L2/3/4 including options
+ *      For a pkt requesting csum offloading, they are L2/3 and may include L4
+ *      if it's a TCP/UDP pkt
+ *
+ *
+ *    Note that this requires that vmxnet3_parse_hdr be called first to set the
+ *      appropriate bits in ctx first
+ */
+static void
+vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+		 struct vmxnet3_tx_ctx *ctx,
+		 struct vmxnet3_adapter *adapter)
+{
+	struct Vmxnet3_TxDataDesc *tdd;
+
 	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
 
 	memcpy(tdd->data, skb->data, ctx->copy_size);
 	netdev_dbg(adapter->netdev,
 		"copy %u bytes to dataRing[%u]\n",
 		ctx->copy_size, tq->tx_ring.next2fill);
-	return 1;
-
-err:
-	return -1;
 }
 
 
@@ -998,22 +1015,7 @@
 		}
 	}
 
-	spin_lock_irqsave(&tq->tx_lock, flags);
-
-	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
-		tq->stats.tx_ring_full++;
-		netdev_dbg(adapter->netdev,
-			"tx queue stopped on %s, next2comp %u"
-			" next2fill %u\n", adapter->netdev->name,
-			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
-
-		vmxnet3_tq_stop(tq, adapter);
-		spin_unlock_irqrestore(&tq->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
-
-	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
+	ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter);
 	if (ret >= 0) {
 		BUG_ON(ret <= 0 && ctx.copy_size != 0);
 		/* hdrs parsed, check against other limits */
@@ -1033,9 +1035,26 @@
 		}
 	} else {
 		tq->stats.drop_hdr_inspect_err++;
-		goto unlock_drop_pkt;
+		goto drop_pkt;
 	}
 
+	spin_lock_irqsave(&tq->tx_lock, flags);
+
+	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
+		tq->stats.tx_ring_full++;
+		netdev_dbg(adapter->netdev,
+			"tx queue stopped on %s, next2comp %u"
+			" next2fill %u\n", adapter->netdev->name,
+			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
+
+		vmxnet3_tq_stop(tq, adapter);
+		spin_unlock_irqrestore(&tq->tx_lock, flags);
+		return NETDEV_TX_BUSY;
+	}
+
+
+	vmxnet3_copy_hdr(skb, tq, &ctx, adapter);
+
 	/* fill tx descs related to addr & len */
 	if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
 		goto unlock_drop_pkt;
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index bdb8a6c..729c344 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.6.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040500
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040600
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 66addb7..bdcf617 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -104,20 +104,23 @@
 #if IS_ENABLED(CONFIG_IPV6)
 static bool check_ipv6_frame(const struct sk_buff *skb)
 {
-	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
-	size_t hlen = sizeof(*ipv6h);
+	const struct ipv6hdr *ipv6h;
+	struct ipv6hdr _ipv6h;
 	bool rc = true;
 
-	if (skb->len < hlen)
+	ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
+	if (!ipv6h)
 		goto out;
 
 	if (ipv6h->nexthdr == NEXTHDR_ICMP) {
 		const struct icmp6hdr *icmph;
+		struct icmp6hdr _icmph;
 
-		if (skb->len < hlen + sizeof(*icmph))
+		icmph = skb_header_pointer(skb, sizeof(_ipv6h),
+					   sizeof(_icmph), &_icmph);
+		if (!icmph)
 			goto out;
 
-		icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
 		switch (icmph->icmp6_type) {
 		case NDISC_ROUTER_SOLICITATION:
 		case NDISC_ROUTER_ADVERTISEMENT:
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2d88c79..1c32bd1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -73,7 +73,7 @@
 static int vxlan_net_id;
 static struct rtnl_link_ops vxlan_link_ops;
 
-static const u8 all_zeros_mac[ETH_ALEN];
+static const u8 all_zeros_mac[ETH_ALEN + 2];
 
 static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
@@ -931,8 +931,10 @@
 						     cb->nlh->nlmsg_seq,
 						     RTM_NEWNEIGH,
 						     NLM_F_MULTI, rd);
-				if (err < 0)
+				if (err < 0) {
+					cb->args[1] = err;
 					goto out;
+				}
 skip:
 				++idx;
 			}
@@ -1306,8 +1308,10 @@
 		gbp = (struct vxlanhdr_gbp *)vxh;
 		md->gbp = ntohs(gbp->policy_id);
 
-		if (tun_dst)
+		if (tun_dst) {
 			tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+			tun_dst->u.tun_info.options_len = sizeof(*md);
+		}
 
 		if (gbp->dont_learn)
 			md->gbp |= VXLAN_GBP_DONT_LEARN;
@@ -1985,11 +1989,6 @@
 				     vxlan->cfg.port_max, true);
 
 	if (info) {
-		if (info->key.tun_flags & TUNNEL_CSUM)
-			flags |= VXLAN_F_UDP_CSUM;
-		else
-			flags &= ~VXLAN_F_UDP_CSUM;
-
 		ttl = info->key.ttl;
 		tos = info->key.tos;
 
@@ -2004,8 +2003,15 @@
 			goto drop;
 		sk = vxlan->vn4_sock->sock->sk;
 
-		if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
-			df = htons(IP_DF);
+		if (info) {
+			if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+				df = htons(IP_DF);
+
+			if (info->key.tun_flags & TUNNEL_CSUM)
+				flags |= VXLAN_F_UDP_CSUM;
+			else
+				flags &= ~VXLAN_F_UDP_CSUM;
+		}
 
 		memset(&fl4, 0, sizeof(fl4));
 		fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
@@ -2101,6 +2107,13 @@
 			return;
 		}
 
+		if (info) {
+			if (info->key.tun_flags & TUNNEL_CSUM)
+				flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
+			else
+				flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+		}
+
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
 		err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
 				      0, ttl, src_port, dst_port, htonl(vni << 8), md,
@@ -2162,9 +2175,11 @@
 #endif
 	}
 
-	if (vxlan->flags & VXLAN_F_COLLECT_METADATA &&
-	    info && info->mode & IP_TUNNEL_INFO_TX) {
-		vxlan_xmit_one(skb, dev, NULL, false);
+	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+		if (info && info->mode & IP_TUNNEL_INFO_TX)
+			vxlan_xmit_one(skb, dev, NULL, false);
+		else
+			kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
 
@@ -2358,27 +2373,41 @@
 {
 }
 
+static int __vxlan_change_mtu(struct net_device *dev,
+			      struct net_device *lowerdev,
+			      struct vxlan_rdst *dst, int new_mtu, bool strict)
+{
+	int max_mtu = IP_MAX_MTU;
+
+	if (lowerdev)
+		max_mtu = lowerdev->mtu;
+
+	if (dst->remote_ip.sa.sa_family == AF_INET6)
+		max_mtu -= VXLAN6_HEADROOM;
+	else
+		max_mtu -= VXLAN_HEADROOM;
+
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	if (new_mtu > max_mtu) {
+		if (strict)
+			return -EINVAL;
+
+		new_mtu = max_mtu;
+	}
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
 static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_rdst *dst = &vxlan->default_dst;
-	struct net_device *lowerdev;
-	int max_mtu;
-
-	lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
-	if (lowerdev == NULL)
-		return eth_change_mtu(dev, new_mtu);
-
-	if (dst->remote_ip.sa.sa_family == AF_INET6)
-		max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
-	else
-		max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
-
-	if (new_mtu < 68 || new_mtu > max_mtu)
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-	return 0;
+	struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+							 dst->remote_ifindex);
+	return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
 }
 
 static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
@@ -2514,6 +2543,7 @@
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 	netif_keep_dst(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 
 	INIT_LIST_HEAD(&vxlan->next);
@@ -2756,6 +2786,7 @@
 	int err;
 	bool use_ipv6 = false;
 	__be16 default_port = vxlan->cfg.dst_port;
+	struct net_device *lowerdev = NULL;
 
 	vxlan->net = src_net;
 
@@ -2776,9 +2807,7 @@
 	}
 
 	if (conf->remote_ifindex) {
-		struct net_device *lowerdev
-			 = __dev_get_by_index(src_net, conf->remote_ifindex);
-
+		lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
 		dst->remote_ifindex = conf->remote_ifindex;
 
 		if (!lowerdev) {
@@ -2802,6 +2831,12 @@
 		needed_headroom = lowerdev->hard_header_len;
 	}
 
+	if (conf->mtu) {
+		err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
+		if (err)
+			return err;
+	}
+
 	if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
 		needed_headroom += VXLAN6_HEADROOM;
 	else
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 7a72407..6292259 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1626,7 +1626,7 @@
 		if (state & Xpr) {
 			void __iomem *scc_addr;
 			unsigned long ring;
-			int i;
+			unsigned int i;
 
 			/*
 			 * - the busy condition happens (sometimes);
diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c
index a7afdee..73fb423 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom.c
@@ -150,18 +150,18 @@
 		return -EIO;
 	}
 
-	if (magic == AR5416_EEPROM_MAGIC) {
-		*swap_needed = false;
-	} else if (swab16(magic) == AR5416_EEPROM_MAGIC) {
+	*swap_needed = false;
+	if (swab16(magic) == AR5416_EEPROM_MAGIC) {
 		if (ah->ah_flags & AH_NO_EEP_SWAP) {
 			ath_info(common,
 				 "Ignoring endianness difference in EEPROM magic bytes.\n");
-
-			*swap_needed = false;
 		} else {
 			*swap_needed = true;
 		}
-	} else {
+	} else if (magic != AR5416_EEPROM_MAGIC) {
+		if (ath9k_hw_use_flash(ah))
+			return 0;
+
 		ath_err(common,
 			"Invalid EEPROM Magic (0x%04x).\n", magic);
 		return -EINVAL;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index 5363739..b98db8a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -879,11 +879,24 @@
 	return 0;
 }
 
-static void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
 {
+	struct sdio_func *func;
+	struct mmc_host *host;
+	uint max_blocks;
 	uint nents;
 	int err;
 
+	func = sdiodev->func[2];
+	host = func->card->host;
+	sdiodev->sg_support = host->max_segs > 1;
+	max_blocks = min_t(uint, host->max_blk_count, 511u);
+	sdiodev->max_request_size = min_t(uint, host->max_req_size,
+					  max_blocks * func->cur_blksize);
+	sdiodev->max_segment_count = min_t(uint, host->max_segs,
+					   SG_MAX_SINGLE_ALLOC);
+	sdiodev->max_segment_size = host->max_seg_size;
+
 	if (!sdiodev->sg_support)
 		return;
 
@@ -1021,9 +1034,6 @@
 
 static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 {
-	struct sdio_func *func;
-	struct mmc_host *host;
-	uint max_blocks;
 	int ret = 0;
 
 	sdiodev->num_funcs = 2;
@@ -1054,26 +1064,6 @@
 		goto out;
 	}
 
-	/*
-	 * determine host related variables after brcmf_sdiod_probe()
-	 * as func->cur_blksize is properly set and F2 init has been
-	 * completed successfully.
-	 */
-	func = sdiodev->func[2];
-	host = func->card->host;
-	sdiodev->sg_support = host->max_segs > 1;
-	max_blocks = min_t(uint, host->max_blk_count, 511u);
-	sdiodev->max_request_size = min_t(uint, host->max_req_size,
-					  max_blocks * func->cur_blksize);
-	sdiodev->max_segment_count = min_t(uint, host->max_segs,
-					   SG_MAX_SINGLE_ALLOC);
-	sdiodev->max_segment_size = host->max_seg_size;
-
-	/* allocate scatter-gather table. sg support
-	 * will be disabled upon allocation failure.
-	 */
-	brcmf_sdiod_sgtable_alloc(sdiodev);
-
 	ret = brcmf_sdiod_freezer_attach(sdiodev);
 	if (ret)
 		goto out;
@@ -1084,7 +1074,7 @@
 		ret = -ENODEV;
 		goto out;
 	}
-	brcmf_sdiod_host_fixup(host);
+	brcmf_sdiod_host_fixup(sdiodev->func[2]->card->host);
 out:
 	if (ret)
 		brcmf_sdiod_remove(sdiodev);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 4265b50..cfee477 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/netdevice.h>
+#include <linux/module.h>
 #include <brcmu_wifi.h>
 #include <brcmu_utils.h>
 #include "core.h"
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index dd66143..a14d9d9d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4114,6 +4114,11 @@
 		goto fail;
 	}
 
+	/* allocate scatter-gather table. sg support
+	 * will be disabled upon allocation failure.
+	 */
+	brcmf_sdiod_sgtable_alloc(bus->sdiodev);
+
 	/* Query the F2 block size, set roundup accordingly */
 	bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
 	bus->roundup = min(max_roundup, bus->blocksize);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
index 5ec7a6d..23f2231 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
@@ -342,6 +342,7 @@
 
 /* Issue an abort to the specified function */
 int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn);
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev);
 void brcmf_sdiod_change_state(struct brcmf_sdio_dev *sdiodev,
 			      enum brcmf_sdiod_state state);
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index 8660677..7438fbe 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@ -53,7 +53,6 @@
 
 config IWLDVM
 	tristate "Intel Wireless WiFi DVM Firmware support"
-	depends on m
 	help
 	  This is the driver that supports the DVM firmware. The list
 	  of the devices that use this firmware is available here:
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
index e60cf14..fa41a5e 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
@@ -74,16 +74,19 @@
 #define IWL7260_UCODE_API_MAX	17
 #define IWL7265_UCODE_API_MAX	17
 #define IWL7265D_UCODE_API_MAX	20
+#define IWL3168_UCODE_API_MAX	20
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK	13
 #define IWL7265_UCODE_API_OK	13
 #define IWL7265D_UCODE_API_OK	13
+#define IWL3168_UCODE_API_OK	20
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN	13
 #define IWL7265_UCODE_API_MIN	13
 #define IWL7265D_UCODE_API_MIN	13
+#define IWL3168_UCODE_API_MIN	20
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION		0x0a1d
@@ -92,6 +95,8 @@
 #define IWL3160_TX_POWER_VERSION	0xffff /* meaningless */
 #define IWL3165_NVM_VERSION		0x709
 #define IWL3165_TX_POWER_VERSION	0xffff /* meaningless */
+#define IWL3168_NVM_VERSION		0xd01
+#define IWL3168_TX_POWER_VERSION	0xffff /* meaningless */
 #define IWL7265_NVM_VERSION		0x0a1d
 #define IWL7265_TX_POWER_VERSION	0xffff /* meaningless */
 #define IWL7265D_NVM_VERSION		0x0c11
@@ -109,6 +114,9 @@
 #define IWL3160_FW_PRE "iwlwifi-3160-"
 #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode"
 
+#define IWL3168_FW_PRE "iwlwifi-3168-"
+#define IWL3168_MODULE_FIRMWARE(api) IWL3168_FW_PRE __stringify(api) ".ucode"
+
 #define IWL7265_FW_PRE "iwlwifi-7265-"
 #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode"
 
@@ -180,6 +188,12 @@
 	.ucode_api_ok = IWL7265_UCODE_API_OK,			\
 	.ucode_api_min = IWL7265_UCODE_API_MIN
 
+#define IWL_DEVICE_3008						\
+	IWL_DEVICE_7000_COMMON,					\
+	.ucode_api_max = IWL3168_UCODE_API_MAX,			\
+	.ucode_api_ok = IWL3168_UCODE_API_OK,			\
+	.ucode_api_min = IWL3168_UCODE_API_MIN
+
 #define IWL_DEVICE_7005D					\
 	IWL_DEVICE_7000_COMMON,					\
 	.ucode_api_max = IWL7265D_UCODE_API_MAX,		\
@@ -299,11 +313,11 @@
 
 const struct iwl_cfg iwl3168_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 3168",
-	.fw_name_pre = IWL7265D_FW_PRE,
-	IWL_DEVICE_7000,
+	.fw_name_pre = IWL3168_FW_PRE,
+	IWL_DEVICE_3008,
 	.ht_params = &iwl7000_ht_params,
-	.nvm_ver = IWL3165_NVM_VERSION,
-	.nvm_calib_ver = IWL3165_TX_POWER_VERSION,
+	.nvm_ver = IWL3168_NVM_VERSION,
+	.nvm_calib_ver = IWL3168_TX_POWER_VERSION,
 	.pwr_tx_backoffs = iwl7265_pwr_tx_backoffs,
 	.dccm_len = IWL7265_DCCM_LEN,
 };
@@ -376,5 +390,6 @@
 
 MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
+MODULE_FIRMWARE(IWL3168_MODULE_FIRMWARE(IWL3168_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7265_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7265D_UCODE_API_OK));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index c84a029..bce9b3420 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -70,12 +71,15 @@
 
 /* Highest firmware API version supported */
 #define IWL8000_UCODE_API_MAX	20
+#define IWL8265_UCODE_API_MAX	20
 
 /* Oldest version we won't warn about */
 #define IWL8000_UCODE_API_OK	13
+#define IWL8265_UCODE_API_OK	20
 
 /* Lowest firmware API version supported */
 #define IWL8000_UCODE_API_MIN	13
+#define IWL8265_UCODE_API_MIN	20
 
 /* NVM versions */
 #define IWL8000_NVM_VERSION		0x0a1d
@@ -93,6 +97,10 @@
 #define IWL8000_MODULE_FIRMWARE(api) \
 	IWL8000_FW_PRE "-" __stringify(api) ".ucode"
 
+#define IWL8265_FW_PRE "iwlwifi-8265-"
+#define IWL8265_MODULE_FIRMWARE(api) \
+	IWL8265_FW_PRE __stringify(api) ".ucode"
+
 #define NVM_HW_SECTION_NUM_FAMILY_8000		10
 #define DEFAULT_NVM_FILE_FAMILY_8000B		"nvmData-8000B"
 #define DEFAULT_NVM_FILE_FAMILY_8000C		"nvmData-8000C"
@@ -144,10 +152,7 @@
 	.support_tx_backoff = true,
 };
 
-#define IWL_DEVICE_8000							\
-	.ucode_api_max = IWL8000_UCODE_API_MAX,				\
-	.ucode_api_ok = IWL8000_UCODE_API_OK,				\
-	.ucode_api_min = IWL8000_UCODE_API_MIN,				\
+#define IWL_DEVICE_8000_COMMON						\
 	.device_family = IWL_DEVICE_FAMILY_8000,			\
 	.max_inst_size = IWL60_RTC_INST_SIZE,				\
 	.max_data_size = IWL60_RTC_DATA_SIZE,				\
@@ -167,10 +172,28 @@
 	.thermal_params = &iwl8000_tt_params,				\
 	.apmg_not_supported = true
 
+#define IWL_DEVICE_8000							\
+	IWL_DEVICE_8000_COMMON,						\
+	.ucode_api_max = IWL8000_UCODE_API_MAX,				\
+	.ucode_api_ok = IWL8000_UCODE_API_OK,				\
+	.ucode_api_min = IWL8000_UCODE_API_MIN				\
+
+#define IWL_DEVICE_8260							\
+	IWL_DEVICE_8000_COMMON,						\
+	.ucode_api_max = IWL8000_UCODE_API_MAX,				\
+	.ucode_api_ok = IWL8000_UCODE_API_OK,				\
+	.ucode_api_min = IWL8000_UCODE_API_MIN				\
+
+#define IWL_DEVICE_8265							\
+	IWL_DEVICE_8000_COMMON,						\
+	.ucode_api_max = IWL8265_UCODE_API_MAX,				\
+	.ucode_api_ok = IWL8265_UCODE_API_OK,				\
+	.ucode_api_min = IWL8265_UCODE_API_MIN				\
+
 const struct iwl_cfg iwl8260_2n_cfg = {
 	.name = "Intel(R) Dual Band Wireless N 8260",
 	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	IWL_DEVICE_8260,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -179,7 +202,7 @@
 const struct iwl_cfg iwl8260_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 8260",
 	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	IWL_DEVICE_8260,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -188,8 +211,8 @@
 
 const struct iwl_cfg iwl8265_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 8265",
-	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	.fw_name_pre = IWL8265_FW_PRE,
+	IWL_DEVICE_8265,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -209,7 +232,7 @@
 const struct iwl_cfg iwl8260_2ac_sdio_cfg = {
 	.name = "Intel(R) Dual Band Wireless-AC 8260",
 	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	IWL_DEVICE_8260,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -236,3 +259,4 @@
 };
 
 MODULE_FIRMWARE(IWL8000_MODULE_FIRMWARE(IWL8000_UCODE_API_OK));
+MODULE_FIRMWARE(IWL8265_MODULE_FIRMWARE(IWL8265_UCODE_API_OK));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 7acb490..ab4c2a0 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -243,8 +243,10 @@
 	if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) {
 		char rev_step = 'A' + CSR_HW_REV_STEP(drv->trans->hw_rev);
 
-		snprintf(drv->firmware_name, sizeof(drv->firmware_name),
-			 "%s%c-%s.ucode", name_pre, rev_step, tag);
+		if (rev_step != 'A')
+			snprintf(drv->firmware_name,
+				 sizeof(drv->firmware_name), "%s%c-%s.ucode",
+				 name_pre, rev_step, tag);
 	}
 
 	IWL_DEBUG_INFO(drv, "attempting to load firmware %s'%s'\n",
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
index 0036d18..ba3f0bb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
@@ -510,6 +510,9 @@
  * @scd_ssn: the index of the last contiguously sent packet
  * @txed: number of Txed frames in this batch
  * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *	not a copy from the LQ command. Thus, if not the first rate was used
+ *	for Tx-ing then this value will be set to 0 by FW.
  */
 struct iwl_mvm_ba_notif {
 	__le32 sta_addr_lo32;
@@ -524,7 +527,8 @@
 	__le16 scd_ssn;
 	u8 txed;
 	u8 txed_2_done;
-	__le16 reserved1;
+	u8 reduced_txp;
+	u8 reserved1;
 } __packed;
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 4ed5180..0ccc697 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -107,7 +107,7 @@
 				    sizeof(tx_ant_cmd), &tx_ant_cmd);
 }
 
-static void iwl_free_fw_paging(struct iwl_mvm *mvm)
+void iwl_free_fw_paging(struct iwl_mvm *mvm)
 {
 	int i;
 
@@ -127,6 +127,8 @@
 			     get_order(mvm->fw_paging_db[i].fw_paging_size));
 	}
 	kfree(mvm->trans->paging_download_buf);
+	mvm->trans->paging_download_buf = NULL;
+
 	memset(mvm->fw_paging_db, 0, sizeof(mvm->fw_paging_db));
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 5f3ac8c..ff7c6df 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1225,6 +1225,9 @@
 void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm,
 					      struct iwl_rx_cmd_buffer *rxb);
 
+/* Paging */
+void iwl_free_fw_paging(struct iwl_mvm *mvm);
+
 /* MVM debugfs */
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 89ea70d..e80be9a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -684,6 +684,8 @@
 	for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++)
 		kfree(mvm->nvm_sections[i].data);
 
+	iwl_free_fw_paging(mvm);
+
 	iwl_mvm_tof_clean(mvm);
 
 	ieee80211_free_hw(mvm->hw);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 7bb6fd0..94caa88 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -724,14 +725,28 @@
 	return 0;
 }
 
-static int rs_collect_tx_data(struct iwl_mvm *mvm,
-			      struct iwl_lq_sta *lq_sta,
-			      struct iwl_scale_tbl_info *tbl,
-			      int scale_index, int attempts, int successes,
-			      u8 reduced_txp)
+static int rs_collect_tpc_data(struct iwl_mvm *mvm,
+			       struct iwl_lq_sta *lq_sta,
+			       struct iwl_scale_tbl_info *tbl,
+			       int scale_index, int attempts, int successes,
+			       u8 reduced_txp)
 {
 	struct iwl_rate_scale_data *window = NULL;
-	int ret;
+
+	if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
+		return -EINVAL;
+
+	window = &tbl->tpc_win[reduced_txp];
+	return  _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
+				    window);
+}
+
+static int rs_collect_tlc_data(struct iwl_mvm *mvm,
+			       struct iwl_lq_sta *lq_sta,
+			       struct iwl_scale_tbl_info *tbl,
+			       int scale_index, int attempts, int successes)
+{
+	struct iwl_rate_scale_data *window = NULL;
 
 	if (scale_index < 0 || scale_index >= IWL_RATE_COUNT)
 		return -EINVAL;
@@ -745,16 +760,6 @@
 
 	/* Select window for current tx bit rate */
 	window = &(tbl->win[scale_index]);
-
-	ret = _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
-				  window);
-	if (ret)
-		return ret;
-
-	if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
-		return -EINVAL;
-
-	window = &tbl->tpc_win[reduced_txp];
 	return _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
 				   window);
 }
@@ -1301,17 +1306,30 @@
 	 * first index into rate scale table.
 	 */
 	if (info->flags & IEEE80211_TX_STAT_AMPDU) {
-		/* ampdu_ack_len = 0 marks no BA was received. In this case
-		 * treat it as a single frame loss as we don't want the success
-		 * ratio to dip too quickly because a BA wasn't received
+		rs_collect_tpc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+				    info->status.ampdu_len,
+				    info->status.ampdu_ack_len,
+				    reduced_txp);
+
+		/* ampdu_ack_len = 0 marks no BA was received. For TLC, treat
+		 * it as a single frame loss as we don't want the success ratio
+		 * to dip too quickly because a BA wasn't received.
+		 * For TPC, there's no need for this optimisation since we want
+		 * to recover very quickly from a bad power reduction and,
+		 * therefore we'd like the success ratio to get an immediate hit
+		 * when failing to get a BA, so we'd switch back to a lower or
+		 * zero power reduction. When FW transmits agg with a rate
+		 * different from the initial rate, it will not use reduced txp
+		 * and will send BA notification twice (one empty with reduced
+		 * txp equal to the value from LQ and one with reduced txp 0).
+		 * We need to update counters for each txp level accordingly.
 		 */
 		if (info->status.ampdu_ack_len == 0)
 			info->status.ampdu_len = 1;
 
-		rs_collect_tx_data(mvm, lq_sta, curr_tbl, lq_rate.index,
-				   info->status.ampdu_len,
-				   info->status.ampdu_ack_len,
-				   reduced_txp);
+		rs_collect_tlc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+				    info->status.ampdu_len,
+				    info->status.ampdu_ack_len);
 
 		/* Update success/fail counts if not searching for new mode */
 		if (lq_sta->rs_state == RS_STATE_STAY_IN_COLUMN) {
@@ -1344,9 +1362,13 @@
 			else
 				continue;
 
-			rs_collect_tx_data(mvm, lq_sta, tmp_tbl, lq_rate.index,
-					   1, i < retries ? 0 : legacy_success,
-					   reduced_txp);
+			rs_collect_tpc_data(mvm, lq_sta, tmp_tbl,
+					    lq_rate.index, 1,
+					    i < retries ? 0 : legacy_success,
+					    reduced_txp);
+			rs_collect_tlc_data(mvm, lq_sta, tmp_tbl,
+					    lq_rate.index, 1,
+					    i < retries ? 0 : legacy_success);
 		}
 
 		/* Update success/fail counts if not searching for new mode */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 9a15642..ea1e177 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1298,6 +1298,10 @@
 		return -EBUSY;
 	}
 
+	/* we don't support "match all" in the firmware */
+	if (!req->n_match_sets)
+		return -EOPNOTSUPP;
+
 	ret = iwl_mvm_check_running_scans(mvm, type);
 	if (ret)
 		return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 8bf48a7..a040edc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -423,6 +423,15 @@
 		return -1;
 	}
 
+	/*
+	 * Increase the pending frames counter, so that later when a reply comes
+	 * in and the counter is decreased - we don't start getting negative
+	 * values.
+	 * Note that we don't need to make sure it isn't agg'd, since we're
+	 * TXing non-sta
+	 */
+	atomic_inc(&mvm->pending_frames[sta_id]);
+
 	return 0;
 }
 
@@ -1029,7 +1038,6 @@
 		struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 		mvmsta->tid_data[tid].rate_n_flags =
 			le32_to_cpu(tx_resp->initial_rate);
-		mvmsta->tid_data[tid].reduced_tpc = tx_resp->reduced_tpc;
 		mvmsta->tid_data[tid].tx_time =
 			le16_to_cpu(tx_resp->wireless_media_time);
 	}
@@ -1060,7 +1068,7 @@
 	/* TODO: not accounted if the whole A-MPDU failed */
 	info->status.tx_time = tid_data->tx_time;
 	info->status.status_driver_data[0] =
-		(void *)(uintptr_t)tid_data->reduced_tpc;
+		(void *)(uintptr_t)ba_notif->reduced_txp;
 	info->status.status_driver_data[1] =
 		(void *)(uintptr_t)tid_data->rate_n_flags;
 }
@@ -1133,6 +1141,8 @@
 			   scd_flow, ba_resp_scd_ssn, ba_notif->txed,
 			   ba_notif->txed_2_done);
 
+	IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
+			   ba_notif->reduced_txp);
 	tid_data->next_reclaimed = ba_resp_scd_ssn;
 
 	iwl_mvm_check_ratid_empty(mvm, sta, tid);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 6261a68..00335ea 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -378,7 +378,10 @@
 	{IWL_PCI_DEVICE(0x3165, 0x8110, iwl3165_2ac_cfg)},
 
 /* 3168 Series */
+	{IWL_PCI_DEVICE(0x24FB, 0x2010, iwl3168_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FB, 0x2110, iwl3168_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FB, 0x2050, iwl3168_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FB, 0x2150, iwl3168_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FB, 0x0000, iwl3168_2ac_cfg)},
 
 /* 7265 Series */
@@ -475,6 +478,7 @@
 	{IWL_PCI_DEVICE(0x24F3, 0x0000, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x8010, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
 	{IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)},
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index cc3888e..73c9559 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -490,6 +490,15 @@
 	iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
 }
 
+static inline void iwl_enable_fw_load_int(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	IWL_DEBUG_ISR(trans, "Enabling FW load interrupt\n");
+	trans_pcie->inta_mask = CSR_INT_BIT_FH_TX;
+	iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
+}
+
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index ccafbd8..152cf9a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1438,9 +1438,11 @@
 			 inta & ~trans_pcie->inta_mask);
 	}
 
-	/* Re-enable all interrupts */
-	/* only Re-enable if disabled by irq */
-	if (test_bit(STATUS_INT_ENABLED, &trans->status))
+	/* we are loading the firmware, enable FH_TX interrupt only */
+	if (handled & CSR_INT_BIT_FH_TX)
+		iwl_enable_fw_load_int(trans);
+	/* only Re-enable all interrupt if disabled by irq */
+	else if (test_bit(STATUS_INT_ENABLED, &trans->status))
 		iwl_enable_interrupts(trans);
 	/* Re-enable RF_KILL if it occurred */
 	else if (handled & CSR_INT_BIT_RF_KILL)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index d60a467..5a854c6 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1021,82 +1021,6 @@
 					       &first_ucode_section);
 }
 
-static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
-				   const struct fw_img *fw, bool run_in_rfkill)
-{
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	bool hw_rfkill;
-	int ret;
-
-	mutex_lock(&trans_pcie->mutex);
-
-	/* Someone called stop_device, don't try to start_fw */
-	if (trans_pcie->is_down) {
-		IWL_WARN(trans,
-			 "Can't start_fw since the HW hasn't been started\n");
-		ret = EIO;
-		goto out;
-	}
-
-	/* This may fail if AMT took ownership of the device */
-	if (iwl_pcie_prepare_card_hw(trans)) {
-		IWL_WARN(trans, "Exit HW not ready\n");
-		ret = -EIO;
-		goto out;
-	}
-
-	iwl_enable_rfkill_int(trans);
-
-	/* If platform's RF_KILL switch is NOT set to KILL */
-	hw_rfkill = iwl_is_rfkill_set(trans);
-	if (hw_rfkill)
-		set_bit(STATUS_RFKILL, &trans->status);
-	else
-		clear_bit(STATUS_RFKILL, &trans->status);
-	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
-	if (hw_rfkill && !run_in_rfkill) {
-		ret = -ERFKILL;
-		goto out;
-	}
-
-	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-
-	ret = iwl_pcie_nic_init(trans);
-	if (ret) {
-		IWL_ERR(trans, "Unable to init nic\n");
-		goto out;
-	}
-
-	/* make sure rfkill handshake bits are cleared */
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
-		    CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
-
-	/* clear (again), then enable host interrupts */
-	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-	iwl_enable_interrupts(trans);
-
-	/* really make sure rfkill handshake bits are cleared */
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-
-	/* Load the given image to the HW */
-	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
-		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
-	else
-		ret = iwl_pcie_load_given_ucode(trans, fw);
-
-out:
-	mutex_unlock(&trans_pcie->mutex);
-	return ret;
-}
-
-static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
-{
-	iwl_pcie_reset_ict(trans);
-	iwl_pcie_tx_start(trans, scd_addr);
-}
-
 static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1127,7 +1051,8 @@
 	 * already dead.
 	 */
 	if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
-		IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n");
+		IWL_DEBUG_INFO(trans,
+			       "DEVICE_ENABLED bit was set and is now cleared\n");
 		iwl_pcie_tx_stop(trans);
 		iwl_pcie_rx_stop(trans);
 
@@ -1161,7 +1086,6 @@
 	iwl_disable_interrupts(trans);
 	spin_unlock(&trans_pcie->irq_lock);
 
-
 	/* clear all status bits */
 	clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
 	clear_bit(STATUS_INT_ENABLED, &trans->status);
@@ -1194,10 +1118,116 @@
 	if (hw_rfkill != was_hw_rfkill)
 		iwl_trans_pcie_rf_kill(trans, hw_rfkill);
 
-	/* re-take ownership to prevent other users from stealing the deivce */
+	/* re-take ownership to prevent other users from stealing the device */
 	iwl_pcie_prepare_card_hw(trans);
 }
 
+static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
+				   const struct fw_img *fw, bool run_in_rfkill)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	bool hw_rfkill;
+	int ret;
+
+	/* This may fail if AMT took ownership of the device */
+	if (iwl_pcie_prepare_card_hw(trans)) {
+		IWL_WARN(trans, "Exit HW not ready\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	iwl_enable_rfkill_int(trans);
+
+	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+	/*
+	 * We enabled the RF-Kill interrupt and the handler may very
+	 * well be running. Disable the interrupts to make sure no other
+	 * interrupt can be fired.
+	 */
+	iwl_disable_interrupts(trans);
+
+	/* Make sure it finished running */
+	synchronize_irq(trans_pcie->pci_dev->irq);
+
+	mutex_lock(&trans_pcie->mutex);
+
+	/* If platform's RF_KILL switch is NOT set to KILL */
+	hw_rfkill = iwl_is_rfkill_set(trans);
+	if (hw_rfkill)
+		set_bit(STATUS_RFKILL, &trans->status);
+	else
+		clear_bit(STATUS_RFKILL, &trans->status);
+	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+	if (hw_rfkill && !run_in_rfkill) {
+		ret = -ERFKILL;
+		goto out;
+	}
+
+	/* Someone called stop_device, don't try to start_fw */
+	if (trans_pcie->is_down) {
+		IWL_WARN(trans,
+			 "Can't start_fw since the HW hasn't been started\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	/* make sure rfkill handshake bits are cleared */
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
+		    CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
+
+	/* clear (again), then enable host interrupts */
+	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+	ret = iwl_pcie_nic_init(trans);
+	if (ret) {
+		IWL_ERR(trans, "Unable to init nic\n");
+		goto out;
+	}
+
+	/*
+	 * Now, we load the firmware and don't want to be interrupted, even
+	 * by the RF-Kill interrupt (hence mask all the interrupt besides the
+	 * FH_TX interrupt which is needed to load the firmware). If the
+	 * RF-Kill switch is toggled, we will find out after having loaded
+	 * the firmware and return the proper value to the caller.
+	 */
+	iwl_enable_fw_load_int(trans);
+
+	/* really make sure rfkill handshake bits are cleared */
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+
+	/* Load the given image to the HW */
+	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
+		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
+	else
+		ret = iwl_pcie_load_given_ucode(trans, fw);
+	iwl_enable_interrupts(trans);
+
+	/* re-check RF-Kill state since we may have missed the interrupt */
+	hw_rfkill = iwl_is_rfkill_set(trans);
+	if (hw_rfkill)
+		set_bit(STATUS_RFKILL, &trans->status);
+	else
+		clear_bit(STATUS_RFKILL, &trans->status);
+
+	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+	if (hw_rfkill && !run_in_rfkill)
+		ret = -ERFKILL;
+
+out:
+	mutex_unlock(&trans_pcie->mutex);
+	return ret;
+}
+
+static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
+{
+	iwl_pcie_reset_ict(trans);
+	iwl_pcie_tx_start(trans, scd_addr);
+}
+
 static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index c32889a..a28414c 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -991,7 +991,8 @@
 		goto nla_put_failure;
 	}
 
-	if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER, ETH_ALEN, hdr->addr2))
+	if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER,
+		    ETH_ALEN, data->addresses[1].addr))
 		goto nla_put_failure;
 
 	/* We get the skb->data */
@@ -2736,7 +2737,7 @@
 
 	spin_lock_bh(&hwsim_radio_lock);
 	list_for_each_entry(data, &hwsim_radios, list) {
-		if (mac80211_hwsim_addr_match(data, addr)) {
+		if (memcmp(data->addresses[1].addr, addr, ETH_ALEN) == 0) {
 			_found = true;
 			break;
 		}
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c
index 9a3966c..155f343 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c
@@ -273,8 +273,10 @@
 			   !(filter_flags & FIF_PLCPFAIL));
 	rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
 			   !(filter_flags & FIF_CONTROL));
-	rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+	rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
 	rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
 			   !rt2x00dev->intf_ap_count);
 	rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
 	rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c
index 1a6740b..2553cdd 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c
@@ -274,8 +274,10 @@
 			   !(filter_flags & FIF_PLCPFAIL));
 	rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
 			   !(filter_flags & FIF_CONTROL));
-	rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+	rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
 	rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
 			   !rt2x00dev->intf_ap_count);
 	rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
 	rt2x00_set_field32(&reg, RXCSR0_DROP_MCAST,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
index d26018f..2d64611 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
@@ -437,8 +437,10 @@
 			   !(filter_flags & FIF_PLCPFAIL));
 	rt2x00_set_field16(&reg, TXRX_CSR2_DROP_CONTROL,
 			   !(filter_flags & FIF_CONTROL));
-	rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME, 1);
+	rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
 	rt2x00_set_field16(&reg, TXRX_CSR2_DROP_TODS,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
 			   !rt2x00dev->intf_ap_count);
 	rt2x00_set_field16(&reg, TXRX_CSR2_DROP_VERSION_ERROR, 1);
 	rt2x00_set_field16(&reg, TXRX_CSR2_DROP_MULTICAST,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index 9733b31..a26afca 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -1490,7 +1490,8 @@
 			   !(filter_flags & FIF_FCSFAIL));
 	rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_PHY_ERROR,
 			   !(filter_flags & FIF_PLCPFAIL));
-	rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME, 1);
+	rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
 	rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0);
 	rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_VER_ERROR, 1);
 	rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_MULTICAST,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index 3282ddb..2642714 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -669,6 +669,7 @@
 	CONFIG_POWERSAVING,
 	CONFIG_HT_DISABLED,
 	CONFIG_QOS_DISABLED,
+	CONFIG_MONITORING,
 
 	/*
 	 * Mark we currently are sequentially reading TX_STA_FIFO register
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00config.c b/drivers/net/wireless/ralink/rt2x00/rt2x00config.c
index 7e8bb11..6a1f508 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00config.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00config.c
@@ -277,6 +277,11 @@
 	else
 		clear_bit(CONFIG_POWERSAVING, &rt2x00dev->flags);
 
+	if (conf->flags & IEEE80211_CONF_MONITOR)
+		set_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+	else
+		clear_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+
 	rt2x00dev->curr_band = conf->chandef.chan->band;
 	rt2x00dev->curr_freq = conf->chandef.chan->center_freq;
 	rt2x00dev->tx_power = conf->power_level;
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
index 3c26ee6..13da95a 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
@@ -385,11 +385,6 @@
 			*total_flags |= FIF_PSPOLL;
 	}
 
-	/*
-	 * Check if there is any work left for us.
-	 */
-	if (rt2x00dev->packet_filter == *total_flags)
-		return;
 	rt2x00dev->packet_filter = *total_flags;
 
 	rt2x00dev->ops->lib->config_filter(rt2x00dev, *total_flags);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c
index c0e730e..24a3436 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c
@@ -530,8 +530,10 @@
 			   !(filter_flags & FIF_PLCPFAIL));
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
 			   !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
 			   !rt2x00dev->intf_ap_count);
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
index 7081e13..7bbc869 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
@@ -480,8 +480,10 @@
 			   !(filter_flags & FIF_PLCPFAIL));
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
 			   !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+			   !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
 			   !rt2x00dev->intf_ap_count);
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
 	rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index 74c14ce..28f7010 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c
@@ -138,6 +138,11 @@
 		    ((wireless_mode == WIRELESS_MODE_N_5G) ||
 		     (wireless_mode == WIRELESS_MODE_N_24G)))
 			rate->flags |= IEEE80211_TX_RC_MCS;
+		if (sta && sta->vht_cap.vht_supported &&
+		    (wireless_mode == WIRELESS_MODE_AC_5G ||
+		     wireless_mode == WIRELESS_MODE_AC_24G ||
+		     wireless_mode == WIRELESS_MODE_AC_ONLY))
+			rate->flags |= IEEE80211_TX_RC_VHT_MCS;
 	}
 }
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c
index a62bf0a..5be3411 100644
--- a/drivers/net/wireless/realtek/rtlwifi/regd.c
+++ b/drivers/net/wireless/realtek/rtlwifi/regd.c
@@ -351,7 +351,6 @@
 	case COUNTRY_CODE_SPAIN:
 	case COUNTRY_CODE_FRANCE:
 	case COUNTRY_CODE_ISRAEL:
-	case COUNTRY_CODE_WORLD_WIDE_13:
 		return &rtl_regdom_12_13;
 	case COUNTRY_CODE_MKK:
 	case COUNTRY_CODE_MKK1:
@@ -360,6 +359,7 @@
 		return &rtl_regdom_14_60_64;
 	case COUNTRY_CODE_GLOBAL_DOMAIN:
 		return &rtl_regdom_14;
+	case COUNTRY_CODE_WORLD_WIDE_13:
 	case COUNTRY_CODE_WORLD_WIDE_13_5G_ALL:
 		return &rtl_regdom_12_13_5g_all;
 	default:
diff --git a/drivers/net/wireless/ti/wlcore/io.c b/drivers/net/wireless/ti/wlcore/io.c
index 9ac118e..564ca75 100644
--- a/drivers/net/wireless/ti/wlcore/io.c
+++ b/drivers/net/wireless/ti/wlcore/io.c
@@ -175,14 +175,14 @@
 	if (ret < 0)
 		goto out;
 
+	/* We don't need the size of the last partition, as it is
+	 * automatically calculated based on the total memory size and
+	 * the sizes of the previous partitions.
+	 */
 	ret = wlcore_raw_write32(wl, HW_PART3_START_ADDR, p->mem3.start);
 	if (ret < 0)
 		goto out;
 
-	ret = wlcore_raw_write32(wl, HW_PART3_SIZE_ADDR, p->mem3.size);
-	if (ret < 0)
-		goto out;
-
 out:
 	return ret;
 }
diff --git a/drivers/net/wireless/ti/wlcore/io.h b/drivers/net/wireless/ti/wlcore/io.h
index 6c257b5..10cf374 100644
--- a/drivers/net/wireless/ti/wlcore/io.h
+++ b/drivers/net/wireless/ti/wlcore/io.h
@@ -36,8 +36,8 @@
 #define HW_PART1_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 12)
 #define HW_PART2_SIZE_ADDR              (HW_PARTITION_REGISTERS_ADDR + 16)
 #define HW_PART2_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 20)
-#define HW_PART3_SIZE_ADDR              (HW_PARTITION_REGISTERS_ADDR + 24)
-#define HW_PART3_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 28)
+#define HW_PART3_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 24)
+
 #define HW_ACCESS_REGISTER_SIZE         4
 
 #define HW_ACCESS_PRAM_MAX_RANGE	0x3c000
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d6abf19..96ccd4e 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -364,6 +364,7 @@
 	RING_IDX cons, prod;
 	unsigned short id;
 	struct sk_buff *skb;
+	bool more_to_do;
 
 	BUG_ON(!netif_carrier_ok(queue->info->netdev));
 
@@ -398,18 +399,8 @@
 
 		queue->tx.rsp_cons = prod;
 
-		/*
-		 * Set a new event, then check for race with update of tx_cons.
-		 * Note that it is essential to schedule a callback, no matter
-		 * how few buffers are pending. Even if there is space in the
-		 * transmit ring, higher layers may be blocked because too much
-		 * data is outstanding: in such cases notification from Xen is
-		 * likely to be the only kick that we'll get.
-		 */
-		queue->tx.sring->rsp_event =
-			prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
-		mb();		/* update shared area */
-	} while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
+		RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
+	} while (more_to_do);
 
 	xennet_maybe_wake_tx(queue);
 }
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7e2c43f..5d28e94 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -382,18 +382,18 @@
 	[ND_CMD_ARS_CAP] = {
 		.in_num = 2,
 		.in_sizes = { 8, 8, },
+		.out_num = 4,
+		.out_sizes = { 4, 4, 4, 4, },
+	},
+	[ND_CMD_ARS_START] = {
+		.in_num = 5,
+		.in_sizes = { 8, 8, 2, 1, 5, },
 		.out_num = 2,
 		.out_sizes = { 4, 4, },
 	},
-	[ND_CMD_ARS_START] = {
-		.in_num = 4,
-		.in_sizes = { 8, 8, 2, 6, },
-		.out_num = 1,
-		.out_sizes = { 4, },
-	},
 	[ND_CMD_ARS_STATUS] = {
-		.out_num = 2,
-		.out_sizes = { 4, UINT_MAX, },
+		.out_num = 3,
+		.out_sizes = { 4, 4, UINT_MAX, },
 	},
 };
 
@@ -442,8 +442,8 @@
 		return in_field[1];
 	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
 		return out_field[1];
-	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
-		return ND_CMD_ARS_STATUS_MAX;
+	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2)
+		return out_field[1] - 8;
 
 	return UINT_MAX;
 }
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index b0045a5..95825b3 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -55,7 +55,7 @@
 	for (p = iomem_resource.child; p ; p = p->sibling) {
 		struct nd_region_desc ndr_desc;
 
-		if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+		if (p->desc != IORES_DESC_PERSISTENT_MEMORY_LEGACY)
 			continue;
 
 		memset(&ndr_desc, 0, sizeof(ndr_desc));
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 8ebfcaa..9edf7eb 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1277,10 +1277,12 @@
 
 	device_lock(dev);
 	claim = ndns->claim;
-	if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
-		mode = "memory";
-	else if (claim && is_nd_btt(claim))
+	if (claim && is_nd_btt(claim))
 		mode = "safe";
+	else if (claim && is_nd_pfn(claim))
+		mode = "memory";
+	else if (!claim && pmem_should_map_pages(dev))
+		mode = "memory";
 	else
 		mode = "raw";
 	rc = sprintf(buf, "%s\n", mode);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 0cc9048b..ae81a2f 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -301,10 +301,8 @@
 
 	switch (le32_to_cpu(pfn_sb->mode)) {
 	case PFN_MODE_RAM:
-		break;
 	case PFN_MODE_PMEM:
-		/* TODO: allocate from PMEM support */
-		return -ENOTTY;
+		break;
 	default:
 		return -ENXIO;
 	}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7edf316..8d0b546 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -41,7 +41,7 @@
 	phys_addr_t		phys_addr;
 	/* when non-zero this device is hosting a 'pfn' instance */
 	phys_addr_t		data_offset;
-	unsigned long		pfn_flags;
+	u64			pfn_flags;
 	void __pmem		*virt_addr;
 	size_t			size;
 	struct badblocks	bb;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 5d62373..b586d84 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -17,5 +17,6 @@
 	  and block devices nodes, as well a a translation for a small
 	  number of selected SCSI commands to NVMe commands to the NVMe
 	  driver.  If you don't know what this means you probably want
-	  to say N here, and if you know what it means you probably
-	  want to say N as well.
+	  to say N here, unless you run a distro that abuses the SCSI
+	  emulation to provide stable device names for mount by id, like
+	  some OpenSuSE and SLES versions.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c5bf001..03c4641 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -55,8 +55,9 @@
 	ns->disk->private_data = NULL;
 	spin_unlock(&dev_list_lock);
 
-	nvme_put_ctrl(ns->ctrl);
 	put_disk(ns->disk);
+	ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
+	nvme_put_ctrl(ns->ctrl);
 	kfree(ns);
 }
 
@@ -183,7 +184,7 @@
 			goto out_unmap;
 		}
 
-		if (meta_buffer) {
+		if (meta_buffer && meta_len) {
 			struct bio_integrity_payload *bip;
 
 			meta = kmalloc(meta_len, GFP_KERNEL);
@@ -373,6 +374,8 @@
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
+	if (io.flags)
+		return -EINVAL;
 
 	switch (io.opcode) {
 	case nvme_cmd_write:
@@ -424,6 +427,8 @@
 		return -EACCES;
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
+	if (cmd.flags)
+		return -EINVAL;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = cmd.opcode;
@@ -556,6 +561,10 @@
 	u16 old_ms;
 	unsigned short bs;
 
+	if (test_bit(NVME_NS_DEAD, &ns->flags)) {
+		set_capacity(disk, 0);
+		return -ENODEV;
+	}
 	if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
 		dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
 				__func__, ns->ctrl->instance, ns->ns_id);
@@ -831,6 +840,23 @@
 	return ret;
 }
 
+static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
+		struct request_queue *q)
+{
+	if (ctrl->max_hw_sectors) {
+		u32 max_segments =
+			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
+
+		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
+		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
+	}
+	if (ctrl->stripe_size)
+		blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
+	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+		blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
+	blk_queue_virt_boundary(q, ctrl->page_size - 1);
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -888,6 +914,8 @@
 		}
 	}
 
+	nvme_set_queue_limits(ctrl, ctrl->admin_q);
+
 	kfree(id);
 	return 0;
 }
@@ -1118,10 +1146,13 @@
 	if (!ns)
 		return;
 
+	ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
+	if (ns->instance < 0)
+		goto out_free_ns;
+
 	ns->queue = blk_mq_init_queue(ctrl->tagset);
 	if (IS_ERR(ns->queue))
-		goto out_free_ns;
-	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+		goto out_release_instance;
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
 	ns->queue->queuedata = ns;
 	ns->ctrl = ctrl;
@@ -1135,17 +1166,9 @@
 	ns->disk = disk;
 	ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
 
+
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
-	if (ctrl->max_hw_sectors) {
-		blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
-		blk_queue_max_segments(ns->queue,
-			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
-	}
-	if (ctrl->stripe_size)
-		blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
-	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
-		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
-	blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+	nvme_set_queue_limits(ctrl, ns->queue);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
@@ -1154,7 +1177,7 @@
 	disk->queue = ns->queue;
 	disk->driverfs_dev = ctrl->device;
 	disk->flags = GENHD_FL_EXT_DEVT;
-	sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+	sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
 	if (nvme_revalidate_disk(ns->disk))
 		goto out_free_disk;
@@ -1174,40 +1197,29 @@
 	kfree(disk);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
+ out_release_instance:
+	ida_simple_remove(&ctrl->ns_ida, ns->instance);
  out_free_ns:
 	kfree(ns);
 }
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	bool kill = nvme_io_incapable(ns->ctrl) &&
-			!blk_queue_dying(ns->queue);
+	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
+		return;
 
-	lockdep_assert_held(&ns->ctrl->namespaces_mutex);
-
-	if (kill) {
-		blk_set_queue_dying(ns->queue);
-
-		/*
-		 * The controller was shutdown first if we got here through
-		 * device removal. The shutdown may requeue outstanding
-		 * requests. These need to be aborted immediately so
-		 * del_gendisk doesn't block indefinitely for their completion.
-		 */
-		blk_mq_abort_requeue_list(ns->queue);
-	}
 	if (ns->disk->flags & GENHD_FL_UP) {
 		if (blk_get_integrity(ns->disk))
 			blk_integrity_unregister(ns->disk);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_attr_group);
 		del_gendisk(ns->disk);
-	}
-	if (kill || !blk_queue_dying(ns->queue)) {
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
+	mutex_lock(&ns->ctrl->namespaces_mutex);
 	list_del_init(&ns->list);
+	mutex_unlock(&ns->ctrl->namespaces_mutex);
 	nvme_put_ns(ns);
 }
 
@@ -1301,10 +1313,8 @@
 {
 	struct nvme_ns *ns, *next;
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
 		nvme_ns_remove(ns);
-	mutex_unlock(&ctrl->namespaces_mutex);
 }
 
 static DEFINE_IDA(nvme_instance_ida);
@@ -1351,6 +1361,7 @@
 
 	put_device(ctrl->device);
 	nvme_release_instance(ctrl);
+	ida_destroy(&ctrl->ns_ida);
 
 	ctrl->ops->free_ctrl(ctrl);
 }
@@ -1391,6 +1402,7 @@
 	}
 	get_device(ctrl->device);
 	dev_set_drvdata(ctrl->device, ctrl);
+	ida_init(&ctrl->ns_ida);
 
 	spin_lock(&dev_list_lock);
 	list_add_tail(&ctrl->node, &nvme_ctrl_list);
@@ -1403,6 +1415,38 @@
 	return ret;
 }
 
+/**
+ * nvme_kill_queues(): Ends all namespace queues
+ * @ctrl: the dead controller that needs to end
+ *
+ * Call this function when the driver determines it is unable to get the
+ * controller in a state capable of servicing IO.
+ */
+void nvme_kill_queues(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
+		if (!kref_get_unless_zero(&ns->kref))
+			continue;
+
+		/*
+		 * Revalidating a dead namespace sets capacity to 0. This will
+		 * end buffered writers dirtying pages that can't be synced.
+		 */
+		if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+			revalidate_disk(ns->disk);
+
+		blk_set_queue_dying(ns->queue);
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+		nvme_put_ns(ns);
+	}
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 5cd3725..6bb15e4 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -146,9 +146,10 @@
 	};
 };
 
+#define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__u16			num_pairs;
-	__u8			pairs[886];
+	__u8			pairs[NVME_NVM_LP_MLC_PAIRS];
 };
 
 struct nvme_nvm_lp_tbl {
@@ -282,9 +283,14 @@
 			memcpy(dst->lptbl.id, src->lptbl.id, 8);
 			dst->lptbl.mlc.num_pairs =
 					le16_to_cpu(src->lptbl.mlc.num_pairs);
-			/* 4 bits per pair */
+
+			if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
+				pr_err("nvm: number of MLC pairs not supported\n");
+				return -EINVAL;
+			}
+
 			memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
-						dst->lptbl.mlc.num_pairs >> 1);
+						dst->lptbl.mlc.num_pairs);
 		}
 	}
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 4fb5bb7..fb15ba5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -72,6 +72,7 @@
 	struct mutex namespaces_mutex;
 	struct device *device;	/* char device */
 	struct list_head node;
+	struct ida ns_ida;
 
 	char name[12];
 	char serial[20];
@@ -102,6 +103,7 @@
 	struct request_queue *queue;
 	struct gendisk *disk;
 	struct kref kref;
+	int instance;
 
 	u8 eui[8];
 	u8 uuid[16];
@@ -112,6 +114,11 @@
 	bool ext;
 	u8 pi_type;
 	int type;
+	unsigned long flags;
+
+#define NVME_NS_REMOVING 0
+#define NVME_NS_DEAD     1
+
 	u64 mode_select_num_blocks;
 	u32 mode_select_block_len;
 };
@@ -139,9 +146,9 @@
 	u32 val = 0;
 
 	if (ctrl->ops->io_incapable(ctrl))
-		return false;
+		return true;
 	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
-		return false;
+		return true;
 	return val & NVME_CSTS_CFS;
 }
 
@@ -240,6 +247,7 @@
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
+void nvme_kill_queues(struct nvme_ctrl *ctrl);
 
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 72ef832..680f578 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -86,7 +86,6 @@
 
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
 /*
@@ -120,6 +119,7 @@
 	unsigned long flags;
 
 #define NVME_CTRL_RESETTING    0
+#define NVME_CTRL_REMOVING     1
 
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
@@ -286,6 +286,17 @@
 	return 0;
 }
 
+static void nvme_queue_scan(struct nvme_dev *dev)
+{
+	/*
+	 * Do not queue new scan work when a controller is reset during
+	 * removal.
+	 */
+	if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
+		return;
+	queue_work(nvme_workq, &dev->scan_work);
+}
+
 static void nvme_complete_async_event(struct nvme_dev *dev,
 		struct nvme_completion *cqe)
 {
@@ -300,7 +311,7 @@
 	switch (result & 0xff07) {
 	case NVME_AER_NOTICE_NS_CHANGED:
 		dev_info(dev->dev, "rescanning\n");
-		queue_work(nvme_workq, &dev->scan_work);
+		nvme_queue_scan(dev);
 	default:
 		dev_warn(dev->dev, "async event result %08x\n", result);
 	}
@@ -678,6 +689,14 @@
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&nvmeq->q_lock);
+	if (unlikely(nvmeq->cq_vector < 0)) {
+		if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
+			ret = BLK_MQ_RQ_QUEUE_BUSY;
+		else
+			ret = BLK_MQ_RQ_QUEUE_ERROR;
+		spin_unlock_irq(&nvmeq->q_lock);
+		goto out;
+	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
@@ -999,7 +1018,7 @@
 	if (!blk_mq_request_started(req))
 		return;
 
-	dev_warn(nvmeq->q_dmadev,
+	dev_dbg_ratelimited(nvmeq->q_dmadev,
 		 "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
 
 	status = NVME_SC_ABORT_REQ;
@@ -1245,6 +1264,12 @@
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
 	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+		/*
+		 * If the controller was reset during removal, it's possible
+		 * user requests may be waiting on a stopped queue. Start the
+		 * queue to flush these to completion.
+		 */
+		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -1685,14 +1710,14 @@
 			return 0;
 		dev->ctrl.tagset = &dev->tagset;
 	}
-	queue_work(nvme_workq, &dev->scan_work);
+	nvme_queue_scan(dev);
 	return 0;
 }
 
-static int nvme_dev_map(struct nvme_dev *dev)
+static int nvme_pci_enable(struct nvme_dev *dev)
 {
 	u64 cap;
-	int bars, result = -ENOMEM;
+	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pci_enable_device_mem(pdev))
@@ -1700,24 +1725,14 @@
 
 	dev->entry[0].vector = pdev->irq;
 	pci_set_master(pdev);
-	bars = pci_select_bars(pdev, IORESOURCE_MEM);
-	if (!bars)
-		goto disable_pci;
-
-	if (pci_request_selected_regions(pdev, bars, "nvme"))
-		goto disable_pci;
 
 	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
 	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
 		goto disable;
 
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
-		goto disable;
-
 	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
 		result = -ENODEV;
-		goto unmap;
+		goto disable;
 	}
 
 	/*
@@ -1727,7 +1742,7 @@
 	if (!pdev->irq) {
 		result = pci_enable_msix(pdev, dev->entry, 1);
 		if (result < 0)
-			goto unmap;
+			goto disable;
 	}
 
 	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
@@ -1754,18 +1769,20 @@
 	pci_save_state(pdev);
 	return 0;
 
- unmap:
-	iounmap(dev->bar);
-	dev->bar = NULL;
  disable:
-	pci_release_regions(pdev);
- disable_pci:
 	pci_disable_device(pdev);
 	return result;
 }
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
+	if (dev->bar)
+		iounmap(dev->bar);
+	pci_release_regions(to_pci_dev(dev->dev));
+}
+
+static void nvme_pci_disable(struct nvme_dev *dev)
+{
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pdev->msi_enabled)
@@ -1773,12 +1790,6 @@
 	else if (pdev->msix_enabled)
 		pci_disable_msix(pdev);
 
-	if (dev->bar) {
-		iounmap(dev->bar);
-		dev->bar = NULL;
-		pci_release_regions(pdev);
-	}
-
 	if (pci_is_enabled(pdev)) {
 		pci_disable_pcie_error_reporting(pdev);
 		pci_disable_device(pdev);
@@ -1837,7 +1848,7 @@
 	nvme_dev_list_remove(dev);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (dev->bar) {
+	if (pci_is_enabled(to_pci_dev(dev->dev))) {
 		nvme_stop_queues(&dev->ctrl);
 		csts = readl(dev->bar + NVME_REG_CSTS);
 	}
@@ -1850,7 +1861,7 @@
 		nvme_disable_io_queues(dev);
 		nvme_disable_admin_queue(dev, shutdown);
 	}
-	nvme_dev_unmap(dev);
+	nvme_pci_disable(dev);
 
 	for (i = dev->queue_count - 1; i >= 0; i--)
 		nvme_clear_queue(dev->queues[i]);
@@ -1894,10 +1905,20 @@
 	kfree(dev);
 }
 
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+{
+	dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
+
+	kref_get(&dev->ctrl.kref);
+	nvme_dev_disable(dev, false);
+	if (!schedule_work(&dev->remove_work))
+		nvme_put_ctrl(&dev->ctrl);
+}
+
 static void nvme_reset_work(struct work_struct *work)
 {
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
-	int result;
+	int result = -ENODEV;
 
 	if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
 		goto out;
@@ -1906,37 +1927,37 @@
 	 * If we're called to reset a live controller first shut it down before
 	 * moving on.
 	 */
-	if (dev->bar)
+	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
 		nvme_dev_disable(dev, false);
 
 	set_bit(NVME_CTRL_RESETTING, &dev->flags);
 
-	result = nvme_dev_map(dev);
+	result = nvme_pci_enable(dev);
 	if (result)
 		goto out;
 
 	result = nvme_configure_admin_queue(dev);
 	if (result)
-		goto unmap;
+		goto out;
 
 	nvme_init_queue(dev->queues[0], 0);
 	result = nvme_alloc_admin_tags(dev);
 	if (result)
-		goto disable;
+		goto out;
 
 	result = nvme_init_identify(&dev->ctrl);
 	if (result)
-		goto free_tags;
+		goto out;
 
 	result = nvme_setup_io_queues(dev);
 	if (result)
-		goto free_tags;
+		goto out;
 
 	dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
 
 	result = nvme_dev_list_add(dev);
 	if (result)
-		goto remove;
+		goto out;
 
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
@@ -1953,19 +1974,8 @@
 	clear_bit(NVME_CTRL_RESETTING, &dev->flags);
 	return;
 
- remove:
-	nvme_dev_list_remove(dev);
- free_tags:
-	nvme_dev_remove_admin(dev);
-	blk_put_queue(dev->ctrl.admin_q);
-	dev->ctrl.admin_q = NULL;
-	dev->queues[0]->tags = NULL;
- disable:
-	nvme_disable_admin_queue(dev, false);
- unmap:
-	nvme_dev_unmap(dev);
  out:
-	nvme_remove_dead_ctrl(dev);
+	nvme_remove_dead_ctrl(dev, result);
 }
 
 static void nvme_remove_dead_ctrl_work(struct work_struct *work)
@@ -1973,19 +1983,12 @@
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
+	nvme_kill_queues(&dev->ctrl);
 	if (pci_get_drvdata(pdev))
 		pci_stop_and_remove_bus_device_locked(pdev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
-{
-	dev_warn(dev->dev, "Removing after probe failure\n");
-	kref_get(&dev->ctrl.kref);
-	if (!schedule_work(&dev->remove_work))
-		nvme_put_ctrl(&dev->ctrl);
-}
-
 static int nvme_reset(struct nvme_dev *dev)
 {
 	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
@@ -2037,6 +2040,27 @@
 	.free_ctrl		= nvme_pci_free_ctrl,
 };
 
+static int nvme_dev_map(struct nvme_dev *dev)
+{
+	int bars;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	if (!bars)
+		return -ENODEV;
+	if (pci_request_selected_regions(pdev, bars, "nvme"))
+		return -ENODEV;
+
+	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+	if (!dev->bar)
+		goto release;
+
+       return 0;
+  release:
+       pci_release_regions(pdev);
+       return -ENODEV;
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
@@ -2061,6 +2085,10 @@
 	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
 
+	result = nvme_dev_map(dev);
+	if (result)
+		goto free;
+
 	INIT_LIST_HEAD(&dev->node);
 	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
@@ -2084,6 +2112,7 @@
 	nvme_release_prp_pools(dev);
  put_pci:
 	put_device(dev->dev);
+	nvme_dev_unmap(dev);
  free:
 	kfree(dev->queues);
 	kfree(dev->entry);
@@ -2107,24 +2136,27 @@
 	nvme_dev_disable(dev, true);
 }
 
+/*
+ * The driver's remove may be called on a device in a partially initialized
+ * state. This function must not have any dependencies on the device state in
+ * order to proceed.
+ */
 static void nvme_remove(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-	spin_lock(&dev_list_lock);
-	list_del_init(&dev->node);
-	spin_unlock(&dev_list_lock);
-
+	set_bit(NVME_CTRL_REMOVING, &dev->flags);
 	pci_set_drvdata(pdev, NULL);
-	flush_work(&dev->reset_work);
 	flush_work(&dev->scan_work);
 	nvme_remove_namespaces(&dev->ctrl);
 	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, true);
+	flush_work(&dev->reset_work);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
+	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 6fd4e5a..9d11d98 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -70,6 +70,9 @@
 	if (pos >= nvmem->size)
 		return 0;
 
+	if (count < nvmem->word_size)
+		return -EINVAL;
+
 	if (pos + count > nvmem->size)
 		count = nvmem->size - pos;
 
@@ -95,6 +98,9 @@
 	if (pos >= nvmem->size)
 		return 0;
 
+	if (count < nvmem->word_size)
+		return -EINVAL;
+
 	if (pos + count > nvmem->size)
 		count = nvmem->size - pos;
 
diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c
index afb67e7..3829e5f 100644
--- a/drivers/nvmem/qfprom.c
+++ b/drivers/nvmem/qfprom.c
@@ -21,6 +21,7 @@
 	.reg_bits = 32,
 	.val_bits = 8,
 	.reg_stride = 1,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 static struct nvmem_config econfig = {
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 706e3ff..e7bfc17 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -635,6 +635,13 @@
 		msi_base = be32_to_cpup(msi_map + 2);
 		rid_len = be32_to_cpup(msi_map + 3);
 
+		if (rid_base & ~map_mask) {
+			dev_err(parent_dev,
+				"Invalid msi-map translation - msi-map-mask (0x%x) ignores rid-base (0x%x)\n",
+				map_mask, rid_base);
+			return rid_out;
+		}
+
 		msi_controller_node = of_find_node_by_phandle(phandle);
 
 		matched = (masked_rid >= rid_base &&
@@ -654,7 +661,7 @@
 	if (!matched)
 		return rid_out;
 
-	rid_out = masked_rid + msi_base;
+	rid_out = masked_rid - rid_base + msi_base;
 	dev_dbg(dev,
 		"msi-map at: %s, using mask %08x, rid-base: %08x, msi-base: %08x, length: %08x, rid: %08x -> %08x\n",
 		dev_name(parent_dev), map_mask, rid_base, msi_base,
@@ -679,18 +686,6 @@
 	return __of_msi_map_rid(dev, &msi_np, rid_in);
 }
 
-static struct irq_domain *__of_get_msi_domain(struct device_node *np,
-					      enum irq_domain_bus_token token)
-{
-	struct irq_domain *d;
-
-	d = irq_find_matching_host(np, token);
-	if (!d)
-		d = irq_find_host(np);
-
-	return d;
-}
-
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
@@ -706,7 +701,7 @@
 	struct device_node *np = NULL;
 
 	__of_msi_map_rid(dev, &np, rid);
-	return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+	return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
 }
 
 /**
@@ -730,7 +725,7 @@
 	/* Check for a single msi-parent property */
 	msi_np = of_parse_phandle(np, "msi-parent", 0);
 	if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
-		d = __of_get_msi_domain(msi_np, token);
+		d = irq_find_matching_host(msi_np, token);
 		if (!d)
 			of_node_put(msi_np);
 		return d;
@@ -744,7 +739,7 @@
 		while (!of_parse_phandle_with_args(np, "msi-parent",
 						   "#msi-cells",
 						   index, &args)) {
-			d = __of_get_msi_domain(args.np, token);
+			d = irq_find_matching_host(args.np, token);
 			if (d)
 				return d;
 
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 86829f8..365dc7e 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -143,11 +143,32 @@
 }
 EXPORT_SYMBOL(of_mdio_parse_addr);
 
+/* The following is a list of PHY compatible strings which appear in
+ * some DTBs. The compatible string is never matched against a PHY
+ * driver, so is pointless. We only expect devices which are not PHYs
+ * to have a compatible string, so they can be matched to an MDIO
+ * driver.  Encourage users to upgrade their DT blobs to remove these.
+ */
+static const struct of_device_id whitelist_phys[] = {
+	{ .compatible = "brcm,40nm-ephy" },
+	{ .compatible = "marvell,88E1111", },
+	{ .compatible = "marvell,88e1116", },
+	{ .compatible = "marvell,88e1118", },
+	{ .compatible = "marvell,88e1145", },
+	{ .compatible = "marvell,88e1149r", },
+	{ .compatible = "marvell,88e1310", },
+	{ .compatible = "marvell,88E1510", },
+	{ .compatible = "marvell,88E1514", },
+	{ .compatible = "moxa,moxart-rtl8201cp", },
+	{}
+};
+
 /*
  * Return true if the child node is for a phy. It must either:
  * o Compatible string of "ethernet-phy-idX.X"
  * o Compatible string of "ethernet-phy-ieee802.3-c45"
  * o Compatible string of "ethernet-phy-ieee802.3-c22"
+ * o In the white list above (and issue a warning)
  * o No compatibility string
  *
  * A device which is not a phy is expected to have a compatible string
@@ -166,6 +187,13 @@
 	if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c22"))
 		return true;
 
+	if (of_match_node(whitelist_phys, child)) {
+		pr_warn(FW_WARN
+			"%s: Whitelisted compatible string. Please remove\n",
+			child->full_name);
+		return true;
+	}
+
 	if (!of_find_property(child, "compatible", NULL))
 		return true;
 
@@ -256,11 +284,19 @@
 struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
 	struct device *d;
+	struct mdio_device *mdiodev;
+
 	if (!phy_np)
 		return NULL;
 
 	d = bus_find_device(&mdio_bus_type, NULL, phy_np, of_phy_match);
-	return d ? to_phy_device(d) : NULL;
+	if (d) {
+		mdiodev = to_mdio_device(d);
+		if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+			return to_phy_device(d);
+	}
+
+	return NULL;
 }
 EXPORT_SYMBOL(of_phy_find_device);
 
@@ -269,6 +305,7 @@
  * @dev: pointer to net_device claiming the phy
  * @phy_np: Pointer to device tree node for the PHY
  * @hndlr: Link state callback for the network device
+ * @flags: flags to pass to the PHY
  * @iface: PHY data interface type
  *
  * If successful, returns a pointer to the phy_device with the embedded
diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index a656d9e..21905fe 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c
@@ -91,7 +91,7 @@
 	for (i=0;i<HPEE_MEMORY_MAX_ENT;i++) {
 		c = get_8(buf+len);
 		
-		if (NULL != (res = kmalloc(sizeof(struct resource), GFP_KERNEL))) {
+		if (NULL != (res = kzalloc(sizeof(struct resource), GFP_KERNEL))) {
 			int result;
 			
 			res->name = name;
@@ -183,7 +183,7 @@
 	for (i=0;i<HPEE_PORT_MAX_ENT;i++) {
 		c = get_8(buf+len);
 		
-		if (NULL != (res = kmalloc(sizeof(struct resource), GFP_KERNEL))) {
+		if (NULL != (res = kzalloc(sizeof(struct resource), GFP_KERNEL))) {
 			res->name = board;
 			res->start = get_16(buf+len+1);
 			res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1;
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 75a6054..d1cdd9c 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -14,6 +14,7 @@
 config PCI_MVEBU
 	bool "Marvell EBU PCIe controller"
 	depends on ARCH_MVEBU || ARCH_DOVE
+	depends on ARM
 	depends on OF
 
 config PCIE_DW
diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index ed34c95..6153853 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c
@@ -58,11 +58,6 @@
 
 #define to_keystone_pcie(x)	container_of(x, struct keystone_pcie, pp)
 
-static inline struct pcie_port *sys_to_pcie(struct pci_sys_data *sys)
-{
-	return sys->private_data;
-}
-
 static inline void update_reg_offset_bit_pos(u32 offset, u32 *reg_offset,
 					     u32 *bit_pos)
 {
@@ -108,7 +103,7 @@
 	struct pcie_port *pp;
 
 	msi = irq_data_get_msi_desc(d);
-	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
 	ks_pcie = to_keystone_pcie(pp);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 	update_reg_offset_bit_pos(offset, &reg_offset, &bit_pos);
@@ -146,7 +141,7 @@
 	u32 offset;
 
 	msi = irq_data_get_msi_desc(d);
-	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
 	ks_pcie = to_keystone_pcie(pp);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
@@ -167,7 +162,7 @@
 	u32 offset;
 
 	msi = irq_data_get_msi_desc(d);
-	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
 	ks_pcie = to_keystone_pcie(pp);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c
index 3923bed..f39961b 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -77,6 +77,16 @@
 	iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE);
 }
 
+/* Drop MSG TLP except for Vendor MSG */
+static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
+{
+	u32 val;
+
+	val = ioread32(pcie->dbi + PCIE_STRFMR1);
+	val &= 0xDFFFFFFF;
+	iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+}
+
 static int ls1021_pcie_link_up(struct pcie_port *pp)
 {
 	u32 state;
@@ -97,7 +107,7 @@
 static void ls1021_pcie_host_init(struct pcie_port *pp)
 {
 	struct ls_pcie *pcie = to_ls_pcie(pp);
-	u32 val, index[2];
+	u32 index[2];
 
 	pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node,
 						     "fsl,pcie-scfg");
@@ -116,13 +126,7 @@
 
 	dw_pcie_setup_rc(pp);
 
-	/*
-	 * LS1021A Workaround for internal TKT228622
-	 * to fix the INTx hang issue
-	 */
-	val = ioread32(pcie->dbi + PCIE_STRFMR1);
-	val &= 0xffff;
-	iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+	ls_pcie_drop_msg_tlp(pcie);
 }
 
 static int ls_pcie_link_up(struct pcie_port *pp)
@@ -147,6 +151,7 @@
 	iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN);
 	ls_pcie_fix_class(pcie);
 	ls_pcie_clear_multifunction(pcie);
+	ls_pcie_drop_msg_tlp(pcie);
 	iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN);
 }
 
diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
index 5816bce..a576aee 100644
--- a/drivers/pci/host/pcie-iproc.c
+++ b/drivers/pci/host/pcie-iproc.c
@@ -64,7 +64,6 @@
 #define OARR_SIZE_CFG                BIT(OARR_SIZE_CFG_SHIFT)
 
 #define MAX_NUM_OB_WINDOWS           2
-#define MAX_NUM_PAXC_PF              4
 
 #define IPROC_PCIE_REG_INVALID 0xffff
 
@@ -170,20 +169,6 @@
 	writel(val, pcie->base + offset + (window * 8));
 }
 
-static inline bool iproc_pcie_device_is_valid(struct iproc_pcie *pcie,
-					      unsigned int slot,
-					      unsigned int fn)
-{
-	if (slot > 0)
-		return false;
-
-	/* PAXC can only support limited number of functions */
-	if (pcie->type == IPROC_PCIE_PAXC && fn >= MAX_NUM_PAXC_PF)
-		return false;
-
-	return true;
-}
-
 /**
  * Note access to the configuration registers are protected at the higher layer
  * by 'pci_lock' in drivers/pci/access.c
@@ -199,11 +184,11 @@
 	u32 val;
 	u16 offset;
 
-	if (!iproc_pcie_device_is_valid(pcie, slot, fn))
-		return NULL;
-
 	/* root complex access */
 	if (busno == 0) {
+		if (slot > 0 || fn > 0)
+			return NULL;
+
 		iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_IND_ADDR,
 				     where & CFG_IND_ADDR_MASK);
 		offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_IND_DATA);
@@ -213,6 +198,14 @@
 			return (pcie->base + offset);
 	}
 
+	/*
+	 * PAXC is connected to an internally emulated EP within the SoC.  It
+	 * allows only one device.
+	 */
+	if (pcie->type == IPROC_PCIE_PAXC)
+		if (slot > 0)
+			return NULL;
+
 	/* EP device access */
 	val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
 		(slot << CFG_ADDR_DEV_NUM_SHIFT) |
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 5f2fda1..fa49f91 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -953,8 +953,10 @@
 {
 	pci_lock_rescan_remove();
 
-	if (slot->flags & SLOT_IS_GOING_AWAY)
+	if (slot->flags & SLOT_IS_GOING_AWAY) {
+		pci_unlock_rescan_remove();
 		return -ENODEV;
+	}
 
 	/* configure all functions */
 	if (!(slot->flags & SLOT_ENABLED))
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 602eb42..f89db3a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4772,8 +4772,10 @@
 void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent)
 {
 	static int use_dt_domains = -1;
-	int domain = of_get_pci_domain_nr(parent->of_node);
+	int domain = -1;
 
+	if (parent)
+		domain = of_get_pci_domain_nr(parent->of_node);
 	/*
 	 * Check DT domain and use_dt_domains values.
 	 *
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 0bf82a2..48d21e0 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -262,7 +262,6 @@
 	rpc->rpd = dev;
 	INIT_WORK(&rpc->dpc_handler, aer_isr);
 	mutex_init(&rpc->rpc_mutex);
-	init_waitqueue_head(&rpc->wait_release);
 
 	/* Use PCIe bus function to store rpc into PCIe device */
 	set_service_data(dev, rpc);
@@ -285,8 +284,7 @@
 		if (rpc->isr)
 			free_irq(dev->irq, dev);
 
-		wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx);
-
+		flush_work(&rpc->dpc_handler);
 		aer_disable_rootport(rpc);
 		kfree(rpc);
 		set_service_data(dev, NULL);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 84420b7..945c939 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -72,7 +72,6 @@
 					 * recovery on the same
 					 * root port hierarchy
 					 */
-	wait_queue_head_t wait_release;
 };
 
 struct aer_broadcast_data {
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 7123925..521e39c 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -811,8 +811,6 @@
 	while (get_e_source(rpc, &e_src))
 		aer_isr_one_error(p_device, &e_src);
 	mutex_unlock(&rpc->rpc_mutex);
-
-	wake_up(&rpc->wait_release);
 }
 
 /**
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index c777b97..5f70fee 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -53,7 +53,7 @@
 };
 
 struct pcifront_sd {
-	int domain;
+	struct pci_sysdata sd;
 	struct pcifront_device *pdev;
 };
 
@@ -67,7 +67,9 @@
 				    unsigned int domain, unsigned int bus,
 				    struct pcifront_device *pdev)
 {
-	sd->domain = domain;
+	/* Because we do not expose that information via XenBus. */
+	sd->sd.node = first_online_node;
+	sd->sd.domain = domain;
 	sd->pdev = pdev;
 }
 
@@ -468,8 +470,8 @@
 	dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
 		 domain, bus);
 
-	bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
-	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+	bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL);
+	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
 	if (!bus_entry || !sd) {
 		err = -ENOMEM;
 		goto err_out;
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index e7e117d..0124d17 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -224,6 +224,7 @@
 
 config PHY_HI6220_USB
 	tristate "hi6220 USB PHY support"
+	depends on (ARCH_HISI && ARM64) || COMPILE_TEST
 	select GENERIC_PHY
 	select MFD_SYSCON
 	help
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 8c7f27d..e7e574d 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -275,20 +275,21 @@
 
 int phy_power_on(struct phy *phy)
 {
-	int ret;
+	int ret = 0;
 
 	if (!phy)
-		return 0;
+		goto out;
 
 	if (phy->pwr) {
 		ret = regulator_enable(phy->pwr);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	ret = phy_pm_runtime_get_sync(phy);
 	if (ret < 0 && ret != -ENOTSUPP)
-		return ret;
+		goto err_pm_sync;
+
 	ret = 0; /* Override possible ret == -ENOTSUPP */
 
 	mutex_lock(&phy->mutex);
@@ -296,19 +297,20 @@
 		ret = phy->ops->power_on(phy);
 		if (ret < 0) {
 			dev_err(&phy->dev, "phy poweron failed --> %d\n", ret);
-			goto out;
+			goto err_pwr_on;
 		}
 	}
 	++phy->power_count;
 	mutex_unlock(&phy->mutex);
 	return 0;
 
-out:
+err_pwr_on:
 	mutex_unlock(&phy->mutex);
 	phy_pm_runtime_put_sync(phy);
+err_pm_sync:
 	if (phy->pwr)
 		regulator_disable(phy->pwr);
-
+out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(phy_power_on);
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index 4a3fc6e..840f3ea 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -715,6 +715,7 @@
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 2000);
 	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
 
 	/* Our job is to use irqs and status from the power module
 	 * to keep the transceiver disabled when nothing's connected.
@@ -750,6 +751,7 @@
 	struct twl4030_usb *twl = platform_get_drvdata(pdev);
 	int val;
 
+	usb_remove_phy(&twl->phy);
 	pm_runtime_get_sync(twl->dev);
 	cancel_delayed_work(&twl->id_workaround_work);
 	device_remove_file(twl->dev, &dev_attr_vbus);
@@ -757,6 +759,13 @@
 	/* set transceiver mode to power on defaults */
 	twl4030_usb_set_mode(twl, -1);
 
+	/* idle ulpi before powering off */
+	if (cable_present(twl->linkstat))
+		pm_runtime_put_noidle(twl->dev);
+	pm_runtime_mark_last_busy(twl->dev);
+	pm_runtime_put_sync_suspend(twl->dev);
+	pm_runtime_disable(twl->dev);
+
 	/* autogate 60MHz ULPI clock,
 	 * clear dpll clock request for i2c access,
 	 * disable 32KHz
@@ -771,11 +780,6 @@
 	/* disable complete OTG block */
 	twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
 
-	if (cable_present(twl->linkstat))
-		pm_runtime_put_noidle(twl->dev);
-	pm_runtime_mark_last_busy(twl->dev);
-	pm_runtime_put(twl->dev);
-
 	return 0;
 }
 
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 16d48a4..e96e86d 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -347,6 +347,7 @@
 		ret = mtk_pconf_set_pull_select(pctl, pin, true, false, arg);
 		break;
 	case PIN_CONFIG_INPUT_ENABLE:
+		mtk_pmx_gpio_set_direction(pctldev, NULL, pin, true);
 		ret = mtk_pconf_set_ies_smt(pctl, pin, arg, param);
 		break;
 	case PIN_CONFIG_OUTPUT:
@@ -354,6 +355,7 @@
 		ret = mtk_pmx_gpio_set_direction(pctldev, NULL, pin, false);
 		break;
 	case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+		mtk_pmx_gpio_set_direction(pctldev, NULL, pin, true);
 		ret = mtk_pconf_set_ies_smt(pctl, pin, arg, param);
 		break;
 	case PIN_CONFIG_DRIVE_STRENGTH:
diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
index e4d4738..3ef798f 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
@@ -666,16 +666,19 @@
 		struct mvebu_mpp_ctrl_setting *set = &mode->settings[0];
 		struct mvebu_pinctrl_group *grp;
 		unsigned num_settings;
+		unsigned supp_settings;
 
-		for (num_settings = 0; ; set++) {
+		for (num_settings = 0, supp_settings = 0; ; set++) {
 			if (!set->name)
 				break;
 
+			num_settings++;
+
 			/* skip unsupported settings for this variant */
 			if (pctl->variant && !(pctl->variant & set->variant))
 				continue;
 
-			num_settings++;
+			supp_settings++;
 
 			/* find gpio/gpo/gpi settings */
 			if (strcmp(set->name, "gpio") == 0)
@@ -688,7 +691,7 @@
 		}
 
 		/* skip modes with no settings for this variant */
-		if (!num_settings)
+		if (!supp_settings)
 			continue;
 
 		grp = mvebu_pinctrl_find_group_by_pid(pctl, mode->pid);
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c
index 085e601..1f7469c 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c
@@ -191,6 +191,7 @@
 		dev_err(pct->dev, "%s write failed (%d)\n", __func__, ret);
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int abx500_get_pull_updown(struct abx500_pinctrl *pct, int offset,
 				  enum abx500_gpio_pull_updown *pull_updown)
 {
@@ -226,6 +227,7 @@
 
 	return ret;
 }
+#endif
 
 static int abx500_set_pull_updown(struct abx500_pinctrl *pct,
 				  int offset, enum abx500_gpio_pull_updown val)
@@ -468,6 +470,7 @@
 	return ret;
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int abx500_get_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
 			  unsigned gpio)
 {
@@ -553,8 +556,6 @@
 	return ret;
 }
 
-#ifdef CONFIG_DEBUG_FS
-
 #include <linux/seq_file.h>
 
 static void abx500_gpio_dbg_show_one(struct seq_file *s,
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
index d90e205..216f227 100644
--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
@@ -426,6 +426,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(pxa2xx_pinctrl_init);
 
 int pxa2xx_pinctrl_exit(struct platform_device *pdev)
 {
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index f67b1e9..5cc97f8 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -514,25 +514,35 @@
 	.pin_config_group_set	= samsung_pinconf_group_set,
 };
 
-/* gpiolib gpio_set callback function */
-static void samsung_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
+/*
+ * The samsung_gpio_set_vlaue() should be called with "bank->slock" held
+ * to avoid race condition.
+ */
+static void samsung_gpio_set_value(struct gpio_chip *gc,
+					  unsigned offset, int value)
 {
 	struct samsung_pin_bank *bank = gpiochip_get_data(gc);
 	const struct samsung_pin_bank_type *type = bank->type;
-	unsigned long flags;
 	void __iomem *reg;
 	u32 data;
 
 	reg = bank->drvdata->virt_base + bank->pctl_offset;
 
-	spin_lock_irqsave(&bank->slock, flags);
-
 	data = readl(reg + type->reg_offset[PINCFG_TYPE_DAT]);
 	data &= ~(1 << offset);
 	if (value)
 		data |= 1 << offset;
 	writel(data, reg + type->reg_offset[PINCFG_TYPE_DAT]);
+}
 
+/* gpiolib gpio_set callback function */
+static void samsung_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
+{
+	struct samsung_pin_bank *bank = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bank->slock, flags);
+	samsung_gpio_set_value(gc, offset, value);
 	spin_unlock_irqrestore(&bank->slock, flags);
 }
 
@@ -553,6 +563,8 @@
 }
 
 /*
+ * The samsung_gpio_set_direction() should be called with "bank->slock" held
+ * to avoid race condition.
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call.
  */
@@ -564,7 +576,6 @@
 	struct samsung_pinctrl_drv_data *drvdata;
 	void __iomem *reg;
 	u32 data, mask, shift;
-	unsigned long flags;
 
 	bank = gpiochip_get_data(gc);
 	type = bank->type;
@@ -581,31 +592,42 @@
 		reg += 4;
 	}
 
-	spin_lock_irqsave(&bank->slock, flags);
-
 	data = readl(reg);
 	data &= ~(mask << shift);
 	if (!input)
 		data |= FUNC_OUTPUT << shift;
 	writel(data, reg);
 
-	spin_unlock_irqrestore(&bank->slock, flags);
-
 	return 0;
 }
 
 /* gpiolib gpio_direction_input callback function. */
 static int samsung_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
 {
-	return samsung_gpio_set_direction(gc, offset, true);
+	struct samsung_pin_bank *bank = gpiochip_get_data(gc);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&bank->slock, flags);
+	ret = samsung_gpio_set_direction(gc, offset, true);
+	spin_unlock_irqrestore(&bank->slock, flags);
+	return ret;
 }
 
 /* gpiolib gpio_direction_output callback function. */
 static int samsung_gpio_direction_output(struct gpio_chip *gc, unsigned offset,
 							int value)
 {
-	samsung_gpio_set(gc, offset, value);
-	return samsung_gpio_set_direction(gc, offset, false);
+	struct samsung_pin_bank *bank = gpiochip_get_data(gc);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&bank->slock, flags);
+	samsung_gpio_set_value(gc, offset, value);
+	ret = samsung_gpio_set_direction(gc, offset, false);
+	spin_unlock_irqrestore(&bank->slock, flags);
+
+	return ret;
 }
 
 /*
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c
index 77d4cf0..11760bb 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c
@@ -492,6 +492,7 @@
 	.pins = sun8i_h3_pins,
 	.npins = ARRAY_SIZE(sun8i_h3_pins),
 	.irq_banks = 2,
+	.irq_read_needs_mux = true
 };
 
 static int sun8i_h3_pinctrl_probe(struct platform_device *pdev)
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index 20f0ad9..e20f23e 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -41,8 +41,7 @@
 	{ KE_KEY, 4, { KEY_HOME } },
 	{ KE_KEY, 5, { KEY_END } },
 	{ KE_KEY, 6, { KEY_PAGEUP } },
-	{ KE_KEY, 4, { KEY_PAGEDOWN } },
-	{ KE_KEY, 4, { KEY_HOME } },
+	{ KE_KEY, 7, { KEY_PAGEDOWN } },
 	{ KE_KEY, 8, { KEY_RFKILL } },
 	{ KE_KEY, 9, { KEY_POWER } },
 	{ KE_KEY, 11, { KEY_SLEEP } },
diff --git a/drivers/platform/x86/intel_scu_ipcutil.c b/drivers/platform/x86/intel_scu_ipcutil.c
index 02bc5a6..aa45424 100644
--- a/drivers/platform/x86/intel_scu_ipcutil.c
+++ b/drivers/platform/x86/intel_scu_ipcutil.c
@@ -49,7 +49,7 @@
 
 static int scu_reg_access(u32 cmd, struct scu_ipc_data  *data)
 {
-	int count = data->count;
+	unsigned int count = data->count;
 
 	if (count == 0 || count == 3 || count > 4)
 		return -EINVAL;
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index f700723..d28e3ab 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -342,6 +342,7 @@
 /* Device IDs of parts that have 32KB MCH space */
 static const unsigned int mch_quirk_devices[] = {
 	0x0154,	/* Ivy Bridge */
+	0x0a04, /* Haswell-ULT */
 	0x0c00,	/* Haswell */
 	0x1604, /* Broadwell */
 };
diff --git a/drivers/power/bq27xxx_battery_i2c.c b/drivers/power/bq27xxx_battery_i2c.c
index 9429e66..8eafc6f 100644
--- a/drivers/power/bq27xxx_battery_i2c.c
+++ b/drivers/power/bq27xxx_battery_i2c.c
@@ -21,6 +21,9 @@
 
 #include <linux/power/bq27xxx_battery.h>
 
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
 static irqreturn_t bq27xxx_battery_irq_handler_thread(int irq, void *data)
 {
 	struct bq27xxx_device_info *di = data;
@@ -70,19 +73,33 @@
 {
 	struct bq27xxx_device_info *di;
 	int ret;
+	char *name;
+	int num;
+
+	/* Get new ID for the new battery device */
+	mutex_lock(&battery_mutex);
+	num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
+	mutex_unlock(&battery_mutex);
+	if (num < 0)
+		return num;
+
+	name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
+	if (!name)
+		goto err_mem;
 
 	di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
 	if (!di)
-		return -ENOMEM;
+		goto err_mem;
 
+	di->id = num;
 	di->dev = &client->dev;
 	di->chip = id->driver_data;
-	di->name = id->name;
+	di->name = name;
 	di->bus.read = bq27xxx_battery_i2c_read;
 
 	ret = bq27xxx_battery_setup(di);
 	if (ret)
-		return ret;
+		goto err_failed;
 
 	/* Schedule a polling after about 1 min */
 	schedule_delayed_work(&di->work, 60 * HZ);
@@ -103,6 +120,16 @@
 	}
 
 	return 0;
+
+err_mem:
+	ret = -ENOMEM;
+
+err_failed:
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, num);
+	mutex_unlock(&battery_mutex);
+
+	return ret;
 }
 
 static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
@@ -111,6 +138,10 @@
 
 	bq27xxx_battery_teardown(di);
 
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, di->id);
+	mutex_unlock(&battery_mutex);
+
 	return 0;
 }
 
diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c
index 934c139..ee4f183 100644
--- a/drivers/ptp/ptp_ixp46x.c
+++ b/drivers/ptp/ptp_ixp46x.c
@@ -178,7 +178,6 @@
 static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
 	u64 ns;
-	u32 remainder;
 	unsigned long flags;
 	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
 	struct ixp46x_ts_regs *regs = ixp_clock->regs;
@@ -189,8 +188,7 @@
 
 	spin_unlock_irqrestore(&register_lock, flags);
 
-	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-	ts->tv_nsec = remainder;
+	*ts = ns_to_timespec64(ns);
 	return 0;
 }
 
@@ -202,8 +200,7 @@
 	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
 	struct ixp46x_ts_regs *regs = ixp_clock->regs;
 
-	ns = ts->tv_sec * 1000000000ULL;
-	ns += ts->tv_nsec;
+	ns = timespec64_to_ns(ts);
 
 	spin_lock_irqsave(&register_lock, flags);
 
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index d7b87c6..e220edc 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -117,7 +117,7 @@
 	if (mport->ops->open_inb_mbox == NULL)
 		goto out;
 
-	res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_mbox_res(res, mbox, mbox);
@@ -185,7 +185,7 @@
 	if (mport->ops->open_outb_mbox == NULL)
 		goto out;
 
-	res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_mbox_res(res, mbox, mbox);
@@ -285,7 +285,7 @@
 {
 	int rc = 0;
 
-	struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_dbell_res(res, start, end);
@@ -360,7 +360,7 @@
 struct resource *rio_request_outb_dbell(struct rio_dev *rdev, u16 start,
 					u16 end)
 {
-	struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_dbell_res(res, start, end);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 41605da..c78db05 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3035,6 +3035,7 @@
 		max = block->base->discipline->max_blocks << block->s2b_shift;
 	}
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, block->request_queue);
+	block->request_queue->limits.max_dev_sectors = max;
 	blk_queue_logical_block_size(block->request_queue,
 				     block->bp_block);
 	blk_queue_max_hw_sectors(block->request_queue, max);
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 184b1db..286782c 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -264,8 +264,10 @@
 		spin_unlock_irqrestore(&lcu->lock, flags);
 		cancel_work_sync(&lcu->suc_data.worker);
 		spin_lock_irqsave(&lcu->lock, flags);
-		if (device == lcu->suc_data.device)
+		if (device == lcu->suc_data.device) {
+			dasd_put_device(device);
 			lcu->suc_data.device = NULL;
+		}
 	}
 	was_pending = 0;
 	if (device == lcu->ruac_data.device) {
@@ -273,8 +275,10 @@
 		was_pending = 1;
 		cancel_delayed_work_sync(&lcu->ruac_data.dwork);
 		spin_lock_irqsave(&lcu->lock, flags);
-		if (device == lcu->ruac_data.device)
+		if (device == lcu->ruac_data.device) {
+			dasd_put_device(device);
 			lcu->ruac_data.device = NULL;
+		}
 	}
 	private->lcu = NULL;
 	spin_unlock_irqrestore(&lcu->lock, flags);
@@ -549,8 +553,10 @@
 	if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "could not update"
 			    " alias data in lcu (rc = %d), retry later", rc);
-		schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ);
+		if (!schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ))
+			dasd_put_device(device);
 	} else {
+		dasd_put_device(device);
 		lcu->ruac_data.device = NULL;
 		lcu->flags &= ~UPDATE_PENDING;
 	}
@@ -593,8 +599,10 @@
 	 */
 	if (!usedev)
 		return -EINVAL;
+	dasd_get_device(usedev);
 	lcu->ruac_data.device = usedev;
-	schedule_delayed_work(&lcu->ruac_data.dwork, 0);
+	if (!schedule_delayed_work(&lcu->ruac_data.dwork, 0))
+		dasd_put_device(usedev);
 	return 0;
 }
 
@@ -723,7 +731,7 @@
 	ASCEBC((char *) &cqr->magic, 4);
 	ccw = cqr->cpaddr;
 	ccw->cmd_code = DASD_ECKD_CCW_RSCK;
-	ccw->flags = 0 ;
+	ccw->flags = CCW_FLAG_SLI;
 	ccw->count = 16;
 	ccw->cda = (__u32)(addr_t) cqr->data;
 	((char *)cqr->data)[0] = reason;
@@ -930,6 +938,7 @@
 	/* 3. read new alias configuration */
 	_schedule_lcu_update(lcu, device);
 	lcu->suc_data.device = NULL;
+	dasd_put_device(device);
 	spin_unlock_irqrestore(&lcu->lock, flags);
 }
 
@@ -989,6 +998,8 @@
 	}
 	lcu->suc_data.reason = reason;
 	lcu->suc_data.device = device;
+	dasd_get_device(device);
 	spin_unlock(&lcu->lock);
-	schedule_work(&lcu->suc_data.worker);
+	if (!schedule_work(&lcu->suc_data.worker))
+		dasd_put_device(device);
 };
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index cb61f30..277b5c8 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -67,7 +67,7 @@
  * and function code cmd.
  * In case of an exception return 3. Otherwise return result of bitwise OR of
  * resulting condition code and DIAG return code. */
-static inline int dia250(void *iob, int cmd)
+static inline int __dia250(void *iob, int cmd)
 {
 	register unsigned long reg2 asm ("2") = (unsigned long) iob;
 	typedef union {
@@ -77,7 +77,6 @@
 	int rc;
 
 	rc = 3;
-	diag_stat_inc(DIAG_STAT_X250);
 	asm volatile(
 		"	diag	2,%2,0x250\n"
 		"0:	ipm	%0\n"
@@ -91,6 +90,12 @@
 	return rc;
 }
 
+static inline int dia250(void *iob, int cmd)
+{
+	diag_stat_inc(DIAG_STAT_X250);
+	return __dia250(iob, cmd);
+}
+
 /* Initialize block I/O to DIAG device using the specified blocksize and
  * block offset. On success, return zero and set end_block to contain the
  * number of blocks on the device minus the specified offset. Return non-zero
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index c692dfe..50597f9 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -139,11 +139,11 @@
 
 	device = container_of(kobj, struct device, kobj);
 	chp = to_channelpath(device);
-	if (!chp->cmg_chars)
+	if (chp->cmg == -1)
 		return 0;
 
-	return memory_read_from_buffer(buf, count, &off,
-				chp->cmg_chars, sizeof(struct cmg_chars));
+	return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+				       sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
@@ -416,7 +416,8 @@
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@
 		return rc;
 
 	rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	if (rc)
+		return rc;
 
-	return rc;
+	return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,14 +469,6 @@
 		ret = -ENODEV;
 		goto out_free;
 	}
-	/* Get channel-measurement characteristics. */
-	if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-		ret = chsc_get_channel_measurement_chars(chp);
-		if (ret)
-			goto out_free;
-	} else {
-		chp->cmg = -1;
-	}
 	dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
 	/* make it known to the system */
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 4efd5b8..af02322 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -48,7 +48,7 @@
 	/* Channel-measurement related stuff: */
 	int cmg;
 	int shared;
-	void *cmg_chars;
+	struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index a831d18..c424c0c 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-	char dbf_txt[15];
+	struct channel_path *chp = chpid_to_chp(chpid);
 	struct chp_link link;
+	char dbf_txt[15];
 
 	sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@
 	link.chpid = chpid;
 	/* Wait until previous actions have settled. */
 	css_wait_for_slow_path();
+
+	mutex_lock(&chp->lock);
+	chp_update_desc(chp);
+	mutex_unlock(&chp->lock);
+
 	for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@
 
 void chsc_chp_online(struct chp_id chpid)
 {
-	char dbf_txt[15];
+	struct channel_path *chp = chpid_to_chp(chpid);
 	struct chp_link link;
+	char dbf_txt[15];
 
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@
 		link.chpid = chpid;
 		/* Wait until previous actions have settled. */
 		css_wait_for_slow_path();
+
+		mutex_lock(&chp->lock);
+		chp_update_desc(chp);
+		mutex_unlock(&chp->lock);
+
 		for_each_subchannel_staged(__s390_process_res_acc, NULL,
 					   &link);
 		css_schedule_reprobe();
@@ -967,22 +980,19 @@
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
 			  struct cmg_chars *chars)
 {
-	struct cmg_chars *cmg_chars;
 	int i, mask;
 
-	cmg_chars = chp->cmg_chars;
 	for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
 		mask = 0x80 >> (i + 3);
 		if (cmcv & mask)
-			cmg_chars->values[i] = chars->values[i];
+			chp->cmg_chars.values[i] = chars->values[i];
 		else
-			cmg_chars->values[i] = 0;
+			chp->cmg_chars.values[i] = 0;
 	}
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-	struct cmg_chars *cmg_chars;
 	int ccode, ret;
 
 	struct {
@@ -1006,10 +1016,11 @@
 		u32 data[NR_MEASUREMENT_CHARS];
 	} __attribute__ ((packed)) *scmc_area;
 
-	chp->cmg_chars = NULL;
-	cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-	if (!cmg_chars)
-		return -ENOMEM;
+	chp->shared = -1;
+	chp->cmg = -1;
+
+	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+		return 0;
 
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
@@ -1031,25 +1042,19 @@
 			      scmc_area->response.code);
 		goto out;
 	}
-	if (scmc_area->not_valid) {
-		chp->cmg = -1;
-		chp->shared = -1;
+	if (scmc_area->not_valid)
 		goto out;
-	}
+
 	chp->cmg = scmc_area->cmg;
 	chp->shared = scmc_area->shared;
 	if (chp->cmg != 2 && chp->cmg != 3) {
 		/* No cmg-dependent data. */
 		goto out;
 	}
-	chp->cmg_chars = cmg_chars;
 	chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
 				  (struct cmg_chars *) &scmc_area->data);
 out:
 	spin_unlock_irq(&chsc_page_lock);
-	if (!chp->cmg_chars)
-		kfree(cmg_chars);
-
 	return ret;
 }
 
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 7b23f43..de1b6c1 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -112,9 +112,10 @@
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;
 	case REP82_ERROR_TRANSPORT_FAIL:
 	case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;
 	default:
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 74edf29..eedfaa2 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -336,9 +336,10 @@
 		/* The result is too short, the CEX2A card may not do that.. */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online, t80h->code);
+			       AP_QID_DEVICE(zdev->ap_dev->qid),
+			       zdev->online, t80h->code);
 
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -368,9 +369,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 9a2dd47..2195971 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -572,9 +572,9 @@
 			return -EINVAL;
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online,
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
 			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -715,9 +715,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -747,9 +747,9 @@
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -773,9 +773,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN; /* repeat the request on a different device. */
 	}
 }
@@ -800,9 +800,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 3613581..93880ed 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -562,7 +562,7 @@
 			/*
 			 * Command Lock contention
 			 */
-			err = SCSI_DH_RETRY;
+			err = SCSI_DH_IMM_RETRY;
 		break;
 	default:
 		break;
@@ -612,6 +612,8 @@
 		err = mode_select_handle_sense(sdev, h->sense);
 		if (err == SCSI_DH_RETRY && retry_cnt--)
 			goto retry;
+		if (err == SCSI_DH_IMM_RETRY)
+			goto retry;
 	}
 	if (err == SCSI_DH_OK) {
 		h->state = RDAC_STATE_ACTIVE;
diff --git a/drivers/scsi/hisi_sas/Kconfig b/drivers/scsi/hisi_sas/Kconfig
index 37a0c71..d1dd161 100644
--- a/drivers/scsi/hisi_sas/Kconfig
+++ b/drivers/scsi/hisi_sas/Kconfig
@@ -1,5 +1,7 @@
 config SCSI_HISI_SAS
 	tristate "HiSilicon SAS"
+	depends on HAS_DMA && HAS_IOMEM
+	depends on ARM64 || COMPILE_TEST
 	select SCSI_SAS_LIBSAS
 	select BLK_DEV_INTEGRITY
 	help
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index 057fdeb..eea24d7 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -1289,13 +1289,10 @@
 		goto out;
 	}
 
-	if (cmplt_hdr_data & CMPLT_HDR_ERR_RCRD_XFRD_MSK) {
-		if (!(cmplt_hdr_data & CMPLT_HDR_CMD_CMPLT_MSK) ||
-		    !(cmplt_hdr_data & CMPLT_HDR_RSPNS_XFRD_MSK))
-			ts->stat = SAS_DATA_OVERRUN;
-		else
-			slot_err_v1_hw(hisi_hba, task, slot);
+	if (cmplt_hdr_data & CMPLT_HDR_ERR_RCRD_XFRD_MSK &&
+		!(cmplt_hdr_data & CMPLT_HDR_RSPNS_XFRD_MSK)) {
 
+		slot_err_v1_hw(hisi_hba, task, slot);
 		goto out;
 	}
 
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 3b3e099..d6a691e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4002,6 +4002,7 @@
 	struct ipr_sglist *sglist;
 	char fname[100];
 	char *src;
+	char *endline;
 	int result, dnld_size;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -4009,6 +4010,10 @@
 
 	snprintf(fname, sizeof(fname), "%s", buf);
 
+	endline = strchr(fname, '\n');
+	if (endline)
+		*endline = '\0';
+
 	if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
 		dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
 		return -EIO;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 52a8765..692a757 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2204,7 +2204,7 @@
 	/* Clear outstanding commands array. */
 	for (que = 0; que < ha->max_req_queues; que++) {
 		req = ha->req_q_map[que];
-		if (!req)
+		if (!req || !test_bit(que, ha->req_qid_map))
 			continue;
 		req->out_ptr = (void *)(req->ring + req->length);
 		*req->out_ptr = 0;
@@ -2221,7 +2221,7 @@
 
 	for (que = 0; que < ha->max_rsp_queues; que++) {
 		rsp = ha->rsp_q_map[que];
-		if (!rsp)
+		if (!rsp || !test_bit(que, ha->rsp_qid_map))
 			continue;
 		rsp->in_ptr = (void *)(rsp->ring + rsp->length);
 		*rsp->in_ptr = 0;
@@ -4981,7 +4981,7 @@
 
 	for (i = 1; i < ha->max_rsp_queues; i++) {
 		rsp = ha->rsp_q_map[i];
-		if (rsp) {
+		if (rsp && test_bit(i, ha->rsp_qid_map)) {
 			rsp->options &= ~BIT_0;
 			ret = qla25xx_init_rsp_que(base_vha, rsp);
 			if (ret != QLA_SUCCESS)
@@ -4996,8 +4996,8 @@
 	}
 	for (i = 1; i < ha->max_req_queues; i++) {
 		req = ha->req_q_map[i];
-		if (req) {
-		/* Clear outstanding commands array. */
+		if (req && test_bit(i, ha->req_qid_map)) {
+			/* Clear outstanding commands array. */
 			req->options &= ~BIT_0;
 			ret = qla25xx_init_req_que(base_vha, req);
 			if (ret != QLA_SUCCESS)
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d4d65eb..4af9547 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3063,9 +3063,9 @@
 		    "MSI-X: Failed to enable support "
 		    "-- %d/%d\n Retry with %d vectors.\n",
 		    ha->msix_count, ret, ret);
+		ha->msix_count = ret;
+		ha->max_rsp_queues = ha->msix_count - 1;
 	}
-	ha->msix_count = ret;
-	ha->max_rsp_queues = ha->msix_count - 1;
 	ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) *
 				ha->msix_count, GFP_KERNEL);
 	if (!ha->msix_entries) {
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index c5dd594..cf7ba52 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -600,7 +600,7 @@
 	/* Delete request queues */
 	for (cnt = 1; cnt < ha->max_req_queues; cnt++) {
 		req = ha->req_q_map[cnt];
-		if (req) {
+		if (req && test_bit(cnt, ha->req_qid_map)) {
 			ret = qla25xx_delete_req_que(vha, req);
 			if (ret != QLA_SUCCESS) {
 				ql_log(ql_log_warn, vha, 0x00ea,
@@ -614,7 +614,7 @@
 	/* Delete response queues */
 	for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) {
 		rsp = ha->rsp_q_map[cnt];
-		if (rsp) {
+		if (rsp && test_bit(cnt, ha->rsp_qid_map)) {
 			ret = qla25xx_delete_rsp_que(vha, rsp);
 			if (ret != QLA_SUCCESS) {
 				ql_log(ql_log_warn, vha, 0x00eb,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index f1788db..f6c7ce3 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -409,6 +409,9 @@
 	int cnt;
 
 	for (cnt = 0; cnt < ha->max_req_queues; cnt++) {
+		if (!test_bit(cnt, ha->req_qid_map))
+			continue;
+
 		req = ha->req_q_map[cnt];
 		qla2x00_free_req_que(ha, req);
 	}
@@ -416,6 +419,9 @@
 	ha->req_q_map = NULL;
 
 	for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) {
+		if (!test_bit(cnt, ha->rsp_qid_map))
+			continue;
+
 		rsp = ha->rsp_q_map[cnt];
 		qla2x00_free_rsp_que(ha, rsp);
 	}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 8075a4c..ee967be 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -105,7 +105,7 @@
 static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	int fn, void *iocb, int flags);
 static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd
-	*cmd, struct atio_from_isp *atio, int ha_locked);
+	*cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
 static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha,
 	struct qla_tgt_srr_imm *imm, int ha_lock);
 static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha,
@@ -1756,7 +1756,7 @@
 		qlt_send_notify_ack(vha, &mcmd->orig_iocb.imm_ntfy,
 		    0, 0, 0, 0, 0, 0);
 	else {
-		if (mcmd->se_cmd.se_tmr_req->function == TMR_ABORT_TASK)
+		if (mcmd->orig_iocb.atio.u.raw.entry_type == ABTS_RECV_24XX)
 			qlt_24xx_send_abts_resp(vha, &mcmd->orig_iocb.abts,
 			    mcmd->fc_tm_rsp, false);
 		else
@@ -2665,7 +2665,7 @@
 			/* no need to terminate. FW already freed exchange. */
 			qlt_abort_cmd_on_host_reset(cmd->vha, cmd);
 		else
-			qlt_send_term_exchange(vha, cmd, &cmd->atio, 1);
+			qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		return 0;
 	}
@@ -3173,7 +3173,8 @@
 }
 
 static void qlt_send_term_exchange(struct scsi_qla_host *vha,
-	struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked)
+	struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked,
+	int ul_abort)
 {
 	unsigned long flags = 0;
 	int rc;
@@ -3193,8 +3194,7 @@
 		qlt_alloc_qfull_cmd(vha, atio, 0, 0);
 
 done:
-	if (cmd && (!cmd->aborted ||
-	    !cmd->cmd_sent_to_fw)) {
+	if (cmd && !ul_abort && !cmd->aborted) {
 		if (cmd->sg_mapped)
 			qlt_unmap_sg(vha, cmd);
 		vha->hw->tgt.tgt_ops->free_cmd(cmd);
@@ -3253,21 +3253,38 @@
 
 }
 
-void qlt_abort_cmd(struct qla_tgt_cmd *cmd)
+int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
 {
 	struct qla_tgt *tgt = cmd->tgt;
 	struct scsi_qla_host *vha = tgt->vha;
 	struct se_cmd *se_cmd = &cmd->se_cmd;
+	unsigned long flags;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014,
 	    "qla_target(%d): terminating exchange for aborted cmd=%p "
 	    "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd,
 	    se_cmd->tag);
 
+	spin_lock_irqsave(&cmd->cmd_lock, flags);
+	if (cmd->aborted) {
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+		/*
+		 * It's normal to see 2 calls in this path:
+		 *  1) XFER Rdy completion + CMD_T_ABORT
+		 *  2) TCM TMR - drain_state_list
+		 */
+	        ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			"multiple abort. %p transport_state %x, t_state %x,"
+			" se_cmd_flags %x \n", cmd, cmd->se_cmd.transport_state,
+			cmd->se_cmd.t_state,cmd->se_cmd.se_cmd_flags);
+		return EIO;
+	}
 	cmd->aborted = 1;
 	cmd->cmd_flags |= BIT_6;
+	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
-	qlt_send_term_exchange(vha, cmd, &cmd->atio, 0);
+	qlt_send_term_exchange(vha, cmd, &cmd->atio, 0, 1);
+	return 0;
 }
 EXPORT_SYMBOL(qlt_abort_cmd);
 
@@ -3282,6 +3299,9 @@
 
 	BUG_ON(cmd->cmd_in_wq);
 
+	if (cmd->sg_mapped)
+		qlt_unmap_sg(cmd->vha, cmd);
+
 	if (!cmd->q_full)
 		qlt_decr_num_pend_cmds(cmd->vha);
 
@@ -3399,7 +3419,7 @@
 		term = 1;
 
 	if (term)
-		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1);
+		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
 
 	return term;
 }
@@ -3580,12 +3600,13 @@
 		case CTIO_PORT_LOGGED_OUT:
 		case CTIO_PORT_UNAVAILABLE:
 		{
-			int logged_out = (status & 0xFFFF);
+			int logged_out =
+				(status & 0xFFFF) == CTIO_PORT_LOGGED_OUT;
+
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf059,
 			    "qla_target(%d): CTIO with %s status %x "
 			    "received (state %x, se_cmd %p)\n", vha->vp_idx,
-			    (logged_out == CTIO_PORT_LOGGED_OUT) ?
-			    "PORT LOGGED OUT" : "PORT UNAVAILABLE",
+			    logged_out ? "PORT LOGGED OUT" : "PORT UNAVAILABLE",
 			    status, cmd->state, se_cmd);
 
 			if (logged_out && cmd->sess) {
@@ -3754,6 +3775,7 @@
 		goto out_term;
 	}
 
+	spin_lock_init(&cmd->cmd_lock);
 	cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
 	cmd->se_cmd.tag = atio->u.isp24.exchange_addr;
 	cmd->unpacked_lun = scsilun_to_int(
@@ -3796,7 +3818,7 @@
 	 */
 	cmd->cmd_flags |= BIT_2;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1);
+	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1, 0);
 
 	qlt_decr_num_pend_cmds(vha);
 	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
@@ -3918,7 +3940,7 @@
 
 out_term:
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	qlt_send_term_exchange(vha, NULL, &op->atio, 1);
+	qlt_send_term_exchange(vha, NULL, &op->atio, 1, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	kfree(op);
 
@@ -3982,7 +4004,8 @@
 
 	cmd->cmd_in_wq = 1;
 	cmd->cmd_flags |= BIT_0;
-	cmd->se_cmd.cpuid = -1;
+	cmd->se_cmd.cpuid = ha->msix_count ?
+		ha->tgt.rspq_vector_cpuid : WORK_CPU_UNBOUND;
 
 	spin_lock(&vha->cmd_list_lock);
 	list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list);
@@ -3990,7 +4013,6 @@
 
 	INIT_WORK(&cmd->work, qlt_do_work);
 	if (ha->msix_count) {
-		cmd->se_cmd.cpuid = ha->tgt.rspq_vector_cpuid;
 		if (cmd->atio.u.isp24.fcp_cmnd.rddata)
 			queue_work_on(smp_processor_id(), qla_tgt_wq,
 			    &cmd->work);
@@ -4771,7 +4793,7 @@
 		dump_stack();
 	} else {
 		cmd->cmd_flags |= BIT_9;
-		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1);
+		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
@@ -4950,7 +4972,7 @@
 				    sctio, sctio->srr_id);
 				list_del(&sctio->srr_list_entry);
 				qlt_send_term_exchange(vha, sctio->cmd,
-				    &sctio->cmd->atio, 1);
+				    &sctio->cmd->atio, 1, 0);
 				kfree(sctio);
 			}
 		}
@@ -5123,7 +5145,7 @@
 	    atio->u.isp24.fcp_hdr.s_id);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	if (!sess) {
-		qlt_send_term_exchange(vha, NULL, atio, 1);
+		qlt_send_term_exchange(vha, NULL, atio, 1, 0);
 		return 0;
 	}
 	/* Sending marker isn't necessary, since we called from ISR */
@@ -5406,7 +5428,7 @@
 #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */
 				qlt_send_busy(vha, atio, SAM_STAT_BUSY);
 #else
-				qlt_send_term_exchange(vha, NULL, atio, 1);
+				qlt_send_term_exchange(vha, NULL, atio, 1, 0);
 #endif
 
 				if (!ha_locked)
@@ -5523,7 +5545,7 @@
 #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */
 				qlt_send_busy(vha, atio, 0);
 #else
-				qlt_send_term_exchange(vha, NULL, atio, 1);
+				qlt_send_term_exchange(vha, NULL, atio, 1, 0);
 #endif
 			} else {
 				if (tgt->tgt_stop) {
@@ -5532,7 +5554,7 @@
 					    "command to target, sending TERM "
 					    "EXCHANGE for rsp\n");
 					qlt_send_term_exchange(vha, NULL,
-					    atio, 1);
+					    atio, 1, 0);
 				} else {
 					ql_dbg(ql_dbg_tgt, vha, 0xe060,
 					    "qla_target(%d): Unable to send "
@@ -5960,7 +5982,7 @@
 	return;
 
 out_term:
-	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 0);
+	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
 	if (sess)
 		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 71b2865..22a6a76 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -943,6 +943,36 @@
 	qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
 };
 
+typedef enum {
+	/*
+	 * BIT_0 - Atio Arrival / schedule to work
+	 * BIT_1 - qlt_do_work
+	 * BIT_2 - qlt_do work failed
+	 * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending
+	 * BIT_4 - read respond/tcm_qla2xx_queue_data_in
+	 * BIT_5 - status respond / tcm_qla2xx_queue_status
+	 * BIT_6 - tcm request to abort/Term exchange.
+	 *	pre_xmit_response->qlt_send_term_exchange
+	 * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response)
+	 * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer)
+	 * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange)
+	 * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data
+
+	 * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd
+	 * BIT_13 - Bad completion -
+	 *	qlt_ctio_do_completion --> qlt_term_ctio_exchange
+	 * BIT_14 - Back end data received/sent.
+	 * BIT_15 - SRR prepare ctio
+	 * BIT_16 - complete free
+	 * BIT_17 - flush - qlt_abort_cmd_on_host_reset
+	 * BIT_18 - completion w/abort status
+	 * BIT_19 - completion w/unknown status
+	 * BIT_20 - tcm_qla2xxx_free_cmd
+	 */
+	CMD_FLAG_DATA_WORK = BIT_11,
+	CMD_FLAG_DATA_WORK_FREE = BIT_21,
+} cmd_flags_t;
+
 struct qla_tgt_cmd {
 	struct se_cmd se_cmd;
 	struct qla_tgt_sess *sess;
@@ -952,6 +982,7 @@
 	/* Sense buffer that will be mapped into outgoing status */
 	unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER];
 
+	spinlock_t cmd_lock;
 	/* to save extra sess dereferences */
 	unsigned int conf_compl_supported:1;
 	unsigned int sg_mapped:1;
@@ -986,30 +1017,8 @@
 
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;
-	/* BIT_0 - Atio Arrival / schedule to work
-	 * BIT_1 - qlt_do_work
-	 * BIT_2 - qlt_do work failed
-	 * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending
-	 * BIT_4 - read respond/tcm_qla2xx_queue_data_in
-	 * BIT_5 - status respond / tcm_qla2xx_queue_status
-	 * BIT_6 - tcm request to abort/Term exchange.
-	 *	pre_xmit_response->qlt_send_term_exchange
-	 * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response)
-	 * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer)
-	 * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange)
-	 * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data
-	 * BIT_11 - Data actually going to TCM : tcm_qla2xx_handle_data_work
-	 * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd
-	 * BIT_13 - Bad completion -
-	 *	qlt_ctio_do_completion --> qlt_term_ctio_exchange
-	 * BIT_14 - Back end data received/sent.
-	 * BIT_15 - SRR prepare ctio
-	 * BIT_16 - complete free
-	 * BIT_17 - flush - qlt_abort_cmd_on_host_reset
-	 * BIT_18 - completion w/abort status
-	 * BIT_19 - completion w/unknown status
-	 */
-	uint32_t cmd_flags;
+
+	cmd_flags_t cmd_flags;
 };
 
 struct qla_tgt_sess_work_param {
@@ -1148,7 +1157,7 @@
 extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *);
 extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *);
 extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t);
-extern void qlt_abort_cmd(struct qla_tgt_cmd *);
+extern int qlt_abort_cmd(struct qla_tgt_cmd *);
 extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *);
 extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *);
 extern void qlt_free_cmd(struct qla_tgt_cmd *cmd);
diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
index ddbe2e7..c3e6225 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.c
+++ b/drivers/scsi/qla2xxx/qla_tmpl.c
@@ -395,6 +395,10 @@
 	if (ent->t263.queue_type == T263_QUEUE_TYPE_REQ) {
 		for (i = 0; i < vha->hw->max_req_queues; i++) {
 			struct req_que *req = vha->hw->req_q_map[i];
+
+			if (!test_bit(i, vha->hw->req_qid_map))
+				continue;
+
 			if (req || !buf) {
 				length = req ?
 				    req->length : REQUEST_ENTRY_CNT_24XX;
@@ -408,6 +412,10 @@
 	} else if (ent->t263.queue_type == T263_QUEUE_TYPE_RSP) {
 		for (i = 0; i < vha->hw->max_rsp_queues; i++) {
 			struct rsp_que *rsp = vha->hw->rsp_q_map[i];
+
+			if (!test_bit(i, vha->hw->rsp_qid_map))
+				continue;
+
 			if (rsp || !buf) {
 				length = rsp ?
 				    rsp->length : RESPONSE_ENTRY_CNT_MQ;
@@ -634,6 +642,10 @@
 	if (ent->t274.queue_type == T274_QUEUE_TYPE_REQ_SHAD) {
 		for (i = 0; i < vha->hw->max_req_queues; i++) {
 			struct req_que *req = vha->hw->req_q_map[i];
+
+			if (!test_bit(i, vha->hw->req_qid_map))
+				continue;
+
 			if (req || !buf) {
 				qla27xx_insert16(i, buf, len);
 				qla27xx_insert16(1, buf, len);
@@ -645,6 +657,10 @@
 	} else if (ent->t274.queue_type == T274_QUEUE_TYPE_RSP_SHAD) {
 		for (i = 0; i < vha->hw->max_rsp_queues; i++) {
 			struct rsp_que *rsp = vha->hw->rsp_q_map[i];
+
+			if (!test_bit(i, vha->hw->rsp_qid_map))
+				continue;
+
 			if (rsp || !buf) {
 				qla27xx_insert16(i, buf, len);
 				qla27xx_insert16(1, buf, len);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index faf0a12..1808a01 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -298,6 +298,10 @@
 {
 	cmd->vha->tgt_counters.core_qla_free_cmd++;
 	cmd->cmd_in_wq = 1;
+
+	BUG_ON(cmd->cmd_flags & BIT_20);
+	cmd->cmd_flags |= BIT_20;
+
 	INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
@@ -374,6 +378,20 @@
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
 				struct qla_tgt_cmd, se_cmd);
+
+	if (cmd->aborted) {
+		/* Cmd can loop during Q-full.  tcm_qla2xxx_aborted_task
+		 * can get ahead of this cmd. tcm_qla2xxx_aborted_task
+		 * already kick start the free.
+		 */
+		pr_debug("write_pending aborted cmd[%p] refcount %d "
+			"transport_state %x, t_state %x, se_cmd_flags %x\n",
+			cmd,cmd->se_cmd.cmd_kref.refcount.counter,
+			cmd->se_cmd.transport_state,
+			cmd->se_cmd.t_state,
+			cmd->se_cmd.se_cmd_flags);
+		return 0;
+	}
 	cmd->cmd_flags |= BIT_3;
 	cmd->bufflen = se_cmd->data_length;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
@@ -405,7 +423,7 @@
 	    se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) {
 		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
 		wait_for_completion_timeout(&se_cmd->t_transport_stop_comp,
-					    3 * HZ);
+						50);
 		return 0;
 	}
 	spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
@@ -444,6 +462,9 @@
 	if (bidi)
 		flags |= TARGET_SCF_BIDI_OP;
 
+	if (se_cmd->cpuid != WORK_CPU_UNBOUND)
+		flags |= TARGET_SCF_USE_CPUID;
+
 	sess = cmd->sess;
 	if (!sess) {
 		pr_err("Unable to locate struct qla_tgt_sess from qla_tgt_cmd\n");
@@ -465,13 +486,25 @@
 static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 {
 	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+	unsigned long flags;
 
 	/*
 	 * Ensure that the complete FCP WRITE payload has been received.
 	 * Otherwise return an exception via CHECK_CONDITION status.
 	 */
 	cmd->cmd_in_wq = 0;
-	cmd->cmd_flags |= BIT_11;
+
+	spin_lock_irqsave(&cmd->cmd_lock, flags);
+	cmd->cmd_flags |= CMD_FLAG_DATA_WORK;
+	if (cmd->aborted) {
+		cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+
+		tcm_qla2xxx_free_cmd(cmd);
+		return;
+	}
+	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+
 	cmd->vha->tgt_counters.qla_core_ret_ctio++;
 	if (!cmd->write_data_transferred) {
 		/*
@@ -546,6 +579,20 @@
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
 				struct qla_tgt_cmd, se_cmd);
 
+	if (cmd->aborted) {
+		/* Cmd can loop during Q-full.  tcm_qla2xxx_aborted_task
+		 * can get ahead of this cmd. tcm_qla2xxx_aborted_task
+		 * already kick start the free.
+		 */
+		pr_debug("queue_data_in aborted cmd[%p] refcount %d "
+			"transport_state %x, t_state %x, se_cmd_flags %x\n",
+			cmd,cmd->se_cmd.cmd_kref.refcount.counter,
+			cmd->se_cmd.transport_state,
+			cmd->se_cmd.t_state,
+			cmd->se_cmd.se_cmd_flags);
+		return 0;
+	}
+
 	cmd->cmd_flags |= BIT_4;
 	cmd->bufflen = se_cmd->data_length;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
@@ -637,11 +684,34 @@
 	qlt_xmit_tm_rsp(mcmd);
 }
 
+
+#define DATA_WORK_NOT_FREE(_flags) \
+	(( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \
+	 CMD_FLAG_DATA_WORK)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
 				struct qla_tgt_cmd, se_cmd);
-	qlt_abort_cmd(cmd);
+	unsigned long flags;
+
+	if (qlt_abort_cmd(cmd))
+		return;
+
+	spin_lock_irqsave(&cmd->cmd_lock, flags);
+	if ((cmd->state == QLA_TGT_STATE_NEW)||
+		((cmd->state == QLA_TGT_STATE_DATA_IN) &&
+		 DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) {
+
+		cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+		/* Cmd have not reached firmware.
+		 * Use this trigger to free it. */
+		tcm_qla2xxx_free_cmd(cmd);
+		return;
+	}
+	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+	return;
+
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 47b9d13..bbfbfd9 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -205,6 +205,8 @@
 	{"Intel", "Multi-Flex", NULL, BLIST_NO_RSOC},
 	{"iRiver", "iFP Mass Driver", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
 	{"LASOUND", "CDX7405", "3.10", BLIST_MAX5LUN | BLIST_SINGLELUN},
+	{"Marvell", "Console", NULL, BLIST_SKIP_VPD_PAGES},
+	{"Marvell", "91xx Config", "1.01", BLIST_SKIP_VPD_PAGES},
 	{"MATSHITA", "PD-1", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
 	{"MATSHITA", "DMC-LC5", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
 	{"MATSHITA", "DMC-LC40", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fa6b2c4..8c6e318 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1344,6 +1344,7 @@
 
 	switch (ret) {
 	case BLKPREP_KILL:
+	case BLKPREP_INVALID:
 		req->errors = DID_NO_CONNECT << 16;
 		/* release the command and kill it */
 		if (req->special) {
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 4f18a85..00bc721 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1272,16 +1272,18 @@
 void scsi_remove_target(struct device *dev)
 {
 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
-	struct scsi_target *starget;
+	struct scsi_target *starget, *last_target = NULL;
 	unsigned long flags;
 
 restart:
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_for_each_entry(starget, &shost->__targets, siblings) {
-		if (starget->state == STARGET_DEL)
+		if (starget->state == STARGET_DEL ||
+		    starget == last_target)
 			continue;
 		if (starget->dev.parent == dev || &starget->dev == dev) {
 			kref_get(&starget->reap_ref);
+			last_target = starget;
 			spin_unlock_irqrestore(shost->host_lock, flags);
 			__scsi_remove_target(starget);
 			scsi_target_reap(starget);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4e08d1cd..d749da7 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -761,7 +761,7 @@
 		break;
 
 	default:
-		ret = BLKPREP_KILL;
+		ret = BLKPREP_INVALID;
 		goto out;
 	}
 
@@ -839,7 +839,7 @@
 	int ret;
 
 	if (sdkp->device->no_write_same)
-		return BLKPREP_KILL;
+		return BLKPREP_INVALID;
 
 	BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
 
@@ -2893,7 +2893,7 @@
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
 	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
 		rw_max = q->limits.io_opt =
-			logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+			sdkp->opt_xfer_blocks * sdp->sector_size;
 	else
 		rw_max = BLK_DEF_MAX_SECTORS;
 
@@ -3268,8 +3268,8 @@
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 	int ret = 0;
 
-	if (!sdkp)
-		return 0;	/* this can happen */
+	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
+		return 0;
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+	if (!sdkp)	/* E.g.: runtime resume at the start of sd_probe() */
+		return 0;
+
 	if (!sdkp->device->manage_start_stop)
 		return 0;
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 503ab8b..5e82067 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1261,7 +1261,7 @@
 	}
 
 	sfp->mmap_called = 1;
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = sfp;
 	vma->vm_ops = &sg_mmap_vm_ops;
 	return 0;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 8bd54a6..64c8674 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -144,6 +144,9 @@
 {
 	struct scsi_cd *cd = dev_get_drvdata(dev);
 
+	if (!cd)	/* E.g.: runtime suspend following sr_remove() */
+		return 0;
+
 	if (cd->media_present)
 		return -EBUSY;
 	else
@@ -985,6 +988,7 @@
 	scsi_autopm_get_device(cd->device);
 
 	del_gendisk(cd->disk);
+	dev_set_drvdata(dev, NULL);
 
 	mutex_lock(&sr_ref_mutex);
 	kref_put(&cd->kref, sr_kref_release);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 55627d0..292c04e 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -42,6 +42,7 @@
 #include <scsi/scsi_devinfo.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/scsi_transport.h>
 
 /*
  * All wire protocol details (storage protocol between the guest and the host)
@@ -477,19 +478,18 @@
 struct storvsc_scan_work {
 	struct work_struct work;
 	struct Scsi_Host *host;
-	uint lun;
+	u8 lun;
+	u8 tgt_id;
 };
 
 static void storvsc_device_scan(struct work_struct *work)
 {
 	struct storvsc_scan_work *wrk;
-	uint lun;
 	struct scsi_device *sdev;
 
 	wrk = container_of(work, struct storvsc_scan_work, work);
-	lun = wrk->lun;
 
-	sdev = scsi_device_lookup(wrk->host, 0, 0, lun);
+	sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
 	if (!sdev)
 		goto done;
 	scsi_rescan_device(&sdev->sdev_gendev);
@@ -540,7 +540,7 @@
 	if (!scsi_host_get(wrk->host))
 		goto done;
 
-	sdev = scsi_device_lookup(wrk->host, 0, 0, wrk->lun);
+	sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
 
 	if (sdev) {
 		scsi_remove_device(sdev);
@@ -940,6 +940,7 @@
 
 	wrk->host = host;
 	wrk->lun = vm_srb->lun;
+	wrk->tgt_id = vm_srb->target_id;
 	INIT_WORK(&wrk->work, process_err_fn);
 	schedule_work(&wrk->work);
 }
@@ -1770,6 +1771,11 @@
 	fc_transport_template = fc_attach_transport(&fc_transport_functions);
 	if (!fc_transport_template)
 		return -ENODEV;
+
+	/*
+	 * Install Hyper-V specific timeout handler.
+	 */
+	fc_transport_template->eh_timed_out = storvsc_eh_timed_out;
 #endif
 
 	ret = vmbus_driver_register(&storvsc_drv);
diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c
index 91a00301..a9bac3b 100644
--- a/drivers/sh/pm_runtime.c
+++ b/drivers/sh/pm_runtime.c
@@ -34,7 +34,7 @@
 
 static int __init sh_pm_runtime_init(void)
 {
-	if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
+	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
 		if (!of_find_compatible_node(NULL, NULL,
 					     "renesas,cpg-mstp-clocks"))
 			return 0;
diff --git a/drivers/sh/superhyway/superhyway.c b/drivers/sh/superhyway/superhyway.c
index 2d9e7f3..bb1fb771 100644
--- a/drivers/sh/superhyway/superhyway.c
+++ b/drivers/sh/superhyway/superhyway.c
@@ -66,7 +66,7 @@
 	superhyway_read_vcr(dev, base, &dev->vcr);
 
 	if (!dev->resource) {
-		dev->resource = kmalloc(sizeof(struct resource), GFP_KERNEL);
+		dev->resource = kzalloc(sizeof(struct resource), GFP_KERNEL);
 		if (!dev->resource) {
 			kfree(dev);
 			return -ENOMEM;
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index aebad36..8feac59 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1571,6 +1571,7 @@
 
 	as->use_cs_gpios = true;
 	if (atmel_spi_is_v2(as) &&
+	    pdev->dev.of_node &&
 	    !of_get_property(pdev->dev.of_node, "cs-gpios", NULL)) {
 		as->use_cs_gpios = false;
 		master->num_chipselect = 4;
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 7de6f84..ecc73c0 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -73,8 +73,8 @@
 
 /* Bitfields in CNTL1 */
 #define BCM2835_AUX_SPI_CNTL1_CSHIGH	0x00000700
-#define BCM2835_AUX_SPI_CNTL1_IDLE	0x00000080
-#define BCM2835_AUX_SPI_CNTL1_TXEMPTY	0x00000040
+#define BCM2835_AUX_SPI_CNTL1_TXEMPTY	0x00000080
+#define BCM2835_AUX_SPI_CNTL1_IDLE	0x00000040
 #define BCM2835_AUX_SPI_CNTL1_MSBF_IN	0x00000002
 #define BCM2835_AUX_SPI_CNTL1_KEEP_IN	0x00000001
 
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 7fd6a4c..7cb0c19 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -84,7 +84,7 @@
 /* SPCOM register values */
 #define SPCOM_CS(x)		((x) << 30)
 #define SPCOM_TRANLEN(x)	((x) << 0)
-#define	SPCOM_TRANLEN_MAX	0xFFFF	/* Max transaction length */
+#define	SPCOM_TRANLEN_MAX	0x10000	/* Max transaction length */
 
 #define AUTOSUSPEND_TIMEOUT 2000
 
@@ -233,7 +233,7 @@
 	reinit_completion(&mpc8xxx_spi->done);
 
 	/* Set SPCOM[CS] and SPCOM[TRANLEN] field */
-	if ((t->len - 1) > SPCOM_TRANLEN_MAX) {
+	if (t->len > SPCOM_TRANLEN_MAX) {
 		dev_err(mpc8xxx_spi->dev, "Transaction length (%d)"
 				" beyond the SPCOM[TRANLEN] field\n", t->len);
 		return -EINVAL;
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index d98c33c..c688efa 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -204,8 +204,8 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 
-	if (spi_imx->dma_is_inited &&
-	    transfer->len > spi_imx->wml * sizeof(u32))
+	if (spi_imx->dma_is_inited && transfer->len >= spi_imx->wml &&
+	    (transfer->len % spi_imx->wml) == 0)
 		return true;
 	return false;
 }
@@ -919,8 +919,6 @@
 	struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL;
 	int ret;
 	unsigned long timeout;
-	u32 dma;
-	int left;
 	struct spi_master *master = spi_imx->bitbang.master;
 	struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
 
@@ -929,7 +927,7 @@
 					tx->sgl, tx->nents, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_tx)
-			goto no_dma;
+			goto tx_nodma;
 
 		desc_tx->callback = spi_imx_dma_tx_callback;
 		desc_tx->callback_param = (void *)spi_imx;
@@ -941,7 +939,7 @@
 					rx->sgl, rx->nents, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_rx)
-			goto no_dma;
+			goto rx_nodma;
 
 		desc_rx->callback = spi_imx_dma_rx_callback;
 		desc_rx->callback_param = (void *)spi_imx;
@@ -954,13 +952,6 @@
 	/* Trigger the cspi module. */
 	spi_imx->dma_finished = 0;
 
-	dma = readl(spi_imx->base + MX51_ECSPI_DMA);
-	dma = dma & (~MX51_ECSPI_DMA_RXT_WML_MASK);
-	/* Change RX_DMA_LENGTH trigger dma fetch tail data */
-	left = transfer->len % spi_imx->wml;
-	if (left)
-		writel(dma | (left << MX51_ECSPI_DMA_RXT_WML_OFFSET),
-				spi_imx->base + MX51_ECSPI_DMA);
 	/*
 	 * Set these order to avoid potential RX overflow. The overflow may
 	 * happen if we enable SPI HW before starting RX DMA due to rescheduling
@@ -992,10 +983,6 @@
 			spi_imx->devtype_data->reset(spi_imx);
 			dmaengine_terminate_all(master->dma_rx);
 		}
-		dma &= ~MX51_ECSPI_DMA_RXT_WML_MASK;
-		writel(dma |
-		       spi_imx->wml << MX51_ECSPI_DMA_RXT_WML_OFFSET,
-		       spi_imx->base + MX51_ECSPI_DMA);
 	}
 
 	spi_imx->dma_finished = 1;
@@ -1008,7 +995,9 @@
 
 	return ret;
 
-no_dma:
+rx_nodma:
+	dmaengine_terminate_all(master->dma_tx);
+tx_nodma:
 	pr_warn_once("%s %s: DMA not available, falling back to PIO\n",
 		     dev_driver_string(&master->dev),
 		     dev_name(&master->dev));
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index 894616f..cf4bb36 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -761,6 +761,7 @@
 		test.iterate_transfer_mask = 1;
 
 	/* count number of transfers with tx/rx_buf != NULL */
+	rx_count = tx_count = 0;
 	for (i = 0; i < test.transfer_count; i++) {
 		if (test.transfers[i].tx_buf)
 			tx_count++;
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 7273820..0caa3c8 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1490,6 +1490,8 @@
 	return status;
 
 disable_pm:
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 free_master:
 	spi_master_put(master);
@@ -1501,6 +1503,7 @@
 	struct spi_master *master = platform_get_drvdata(pdev);
 	struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
 
+	pm_runtime_dont_use_autosuspend(mcspi->dev);
 	pm_runtime_put_sync(mcspi->dev);
 	pm_runtime_disable(&pdev->dev);
 
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 79a8bc4..7cb1b2d 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -749,6 +749,7 @@
 	return 0;
 
 err_register_master:
+	pm_runtime_disable(&pdev->dev);
 	if (rs->dma_tx.ch)
 		dma_release_channel(rs->dma_tx.ch);
 	if (rs->dma_rx.ch)
@@ -778,6 +779,8 @@
 	if (rs->dma_rx.ch)
 		dma_release_channel(rs->dma_rx.ch);
 
+	spi_master_put(master);
+
 	return 0;
 }
 
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 0c67586..d8e4219 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -83,6 +83,7 @@
 config SSB_HOST_SOC
 	bool "Support for SSB bus on SoC"
 	depends on SSB && BCM47XX_NVRAM
+	select SSB_SPROM
 	help
 	  Host interface for a SSB directly mapped into memory. This is
 	  for some Broadcom SoCs from the BCM47xx and BCM53xx lines.
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index cde5ff7..d1a7507 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -613,9 +613,10 @@
 	return err;
 }
 
-static int ssb_bus_register(struct ssb_bus *bus,
-			    ssb_invariants_func_t get_invariants,
-			    unsigned long baseaddr)
+static int __maybe_unused
+ssb_bus_register(struct ssb_bus *bus,
+		 ssb_invariants_func_t get_invariants,
+		 unsigned long baseaddr)
 {
 	int err;
 
diff --git a/drivers/staging/iio/adc/Kconfig b/drivers/staging/iio/adc/Kconfig
index 58d4517..b9519be 100644
--- a/drivers/staging/iio/adc/Kconfig
+++ b/drivers/staging/iio/adc/Kconfig
@@ -6,6 +6,7 @@
 config AD7606
 	tristate "Analog Devices AD7606 ADC driver"
 	depends on GPIOLIB || COMPILE_TEST
+	depends on HAS_IOMEM
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
 	help
diff --git a/drivers/staging/iio/meter/ade7753.c b/drivers/staging/iio/meter/ade7753.c
index f129039..6928710 100644
--- a/drivers/staging/iio/meter/ade7753.c
+++ b/drivers/staging/iio/meter/ade7753.c
@@ -217,8 +217,12 @@
 static int ade7753_reset(struct device *dev)
 {
 	u16 val;
+	int ret;
 
-	ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+	ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+	if (ret)
+		return ret;
+
 	val |= BIT(6); /* Software Chip Reset */
 
 	return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
@@ -343,8 +347,12 @@
 static int ade7753_stop_device(struct device *dev)
 {
 	u16 val;
+	int ret;
 
-	ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+	ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+	if (ret)
+		return ret;
+
 	val |= BIT(4);  /* AD converters can be turned off */
 
 	return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.c b/drivers/staging/media/davinci_vpfe/vpfe_video.c
index 3ec7e65..db49af9 100644
--- a/drivers/staging/media/davinci_vpfe/vpfe_video.c
+++ b/drivers/staging/media/davinci_vpfe/vpfe_video.c
@@ -147,7 +147,7 @@
 	mutex_lock(&mdev->graph_mutex);
 	ret = media_entity_graph_walk_init(&graph, entity->graph_obj.mdev);
 	if (ret) {
-		mutex_unlock(&video->lock);
+		mutex_unlock(&mdev->graph_mutex);
 		return -ENOMEM;
 	}
 	media_entity_graph_walk_start(&graph, entity);
diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index 79ac192..70b8f4f 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -825,8 +825,7 @@
 	lcd_send_serial(0x1F);	/* R/W=W, RS=0 */
 	lcd_send_serial(cmd & 0x0F);
 	lcd_send_serial((cmd >> 4) & 0x0F);
-	/* the shortest command takes at least 40 us */
-	usleep_range(40, 100);
+	udelay(40);		/* the shortest command takes at least 40 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -837,8 +836,7 @@
 	lcd_send_serial(0x5F);	/* R/W=W, RS=1 */
 	lcd_send_serial(data & 0x0F);
 	lcd_send_serial((data >> 4) & 0x0F);
-	/* the shortest data takes at least 40 us */
-	usleep_range(40, 100);
+	udelay(40);		/* the shortest data takes at least 40 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -848,20 +846,19 @@
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the data port */
 	w_dtr(pprt, cmd);
-	/* maintain the data during 20 us before the strobe */
-	usleep_range(20, 100);
+	udelay(20);	/* maintain the data during 20 us before the strobe */
 
 	bits.e = BIT_SET;
 	bits.rs = BIT_CLR;
 	bits.rw = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(40, 100);	/* maintain the strobe during 40 us */
+	udelay(40);	/* maintain the strobe during 40 us */
 
 	bits.e = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(120, 500);	/* the shortest command takes at least 120 us */
+	udelay(120);	/* the shortest command takes at least 120 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -871,20 +868,19 @@
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the data port */
 	w_dtr(pprt, data);
-	/* maintain the data during 20 us before the strobe */
-	usleep_range(20, 100);
+	udelay(20);	/* maintain the data during 20 us before the strobe */
 
 	bits.e = BIT_SET;
 	bits.rs = BIT_SET;
 	bits.rw = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(40, 100);	/* maintain the strobe during 40 us */
+	udelay(40);	/* maintain the strobe during 40 us */
 
 	bits.e = BIT_CLR;
 	set_ctrl_bits();
 
-	usleep_range(45, 100);	/* the shortest data takes at least 45 us */
+	udelay(45);	/* the shortest data takes at least 45 us */
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -894,7 +890,7 @@
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the control port */
 	w_ctr(pprt, cmd);
-	usleep_range(60, 120);
+	udelay(60);
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -904,7 +900,7 @@
 	spin_lock_irq(&pprt_lock);
 	/* present the data to the data port */
 	w_dtr(pprt, data);
-	usleep_range(60, 120);
+	udelay(60);
 	spin_unlock_irq(&pprt_lock);
 }
 
@@ -947,7 +943,7 @@
 		lcd_send_serial(0x5F);	/* R/W=W, RS=1 */
 		lcd_send_serial(' ' & 0x0F);
 		lcd_send_serial((' ' >> 4) & 0x0F);
-		usleep_range(40, 100);	/* the shortest data takes at least 40 us */
+		udelay(40);	/* the shortest data takes at least 40 us */
 	}
 	spin_unlock_irq(&pprt_lock);
 
@@ -971,7 +967,7 @@
 		w_dtr(pprt, ' ');
 
 		/* maintain the data during 20 us before the strobe */
-		usleep_range(20, 100);
+		udelay(20);
 
 		bits.e = BIT_SET;
 		bits.rs = BIT_SET;
@@ -979,13 +975,13 @@
 		set_ctrl_bits();
 
 		/* maintain the strobe during 40 us */
-		usleep_range(40, 100);
+		udelay(40);
 
 		bits.e = BIT_CLR;
 		set_ctrl_bits();
 
 		/* the shortest data takes at least 45 us */
-		usleep_range(45, 100);
+		udelay(45);
 	}
 	spin_unlock_irq(&pprt_lock);
 
@@ -1007,7 +1003,7 @@
 	for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) {
 		/* present the data to the data port */
 		w_dtr(pprt, ' ');
-		usleep_range(60, 120);
+		udelay(60);
 	}
 
 	spin_unlock_irq(&pprt_lock);
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
index ba876506..f1f3eca 100644
--- a/drivers/staging/rdma/Kconfig
+++ b/drivers/staging/rdma/Kconfig
@@ -22,12 +22,6 @@
 # Please keep entries in alphabetic order
 if STAGING_RDMA
 
-source "drivers/staging/rdma/amso1100/Kconfig"
-
-source "drivers/staging/rdma/ehca/Kconfig"
-
 source "drivers/staging/rdma/hfi1/Kconfig"
 
-source "drivers/staging/rdma/ipath/Kconfig"
-
 endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
index 139d78e..8c7fc1d 100644
--- a/drivers/staging/rdma/Makefile
+++ b/drivers/staging/rdma/Makefile
@@ -1,5 +1,2 @@
 # Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_AMSO1100)	+= amso1100/
-obj-$(CONFIG_INFINIBAND_EHCA)	+= ehca/
 obj-$(CONFIG_INFINIBAND_HFI1)	+= hfi1/
-obj-$(CONFIG_INFINIBAND_IPATH)	+= ipath/
diff --git a/drivers/staging/rdma/amso1100/Kbuild b/drivers/staging/rdma/amso1100/Kbuild
deleted file mode 100644
index 950dfab..0000000
--- a/drivers/staging/rdma/amso1100/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
-
-iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
-	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/staging/rdma/amso1100/Kconfig b/drivers/staging/rdma/amso1100/Kconfig
deleted file mode 100644
index e6ce5f2..0000000
--- a/drivers/staging/rdma/amso1100/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_AMSO1100
-	tristate "Ammasso 1100 HCA support"
-	depends on PCI && INET
-	---help---
-	  This is a low-level driver for the Ammasso 1100 host
-	  channel adapter (HCA).
-
-config INFINIBAND_AMSO1100_DEBUG
-	bool "Verbose debugging output"
-	depends on INFINIBAND_AMSO1100
-	default n
-	---help---
-	  This option causes the amso1100 driver to produce a bunch of
-	  debug messages.  Select this if you are developing the driver
-	  or trying to diagnose a problem.
diff --git a/drivers/staging/rdma/amso1100/TODO b/drivers/staging/rdma/amso1100/TODO
deleted file mode 100644
index 18b00a5..0000000
--- a/drivers/staging/rdma/amso1100/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-7/2015
-
-The amso1100 driver has been deprecated and moved to drivers/staging.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/amso1100/c2.c b/drivers/staging/rdma/amso1100/c2.c
deleted file mode 100644
index b46ebd1..0000000
--- a/drivers/staging/rdma/amso1100/c2.c
+++ /dev/null
@@ -1,1240 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/prefetch.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_provider.h"
-
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
-    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-
-static int debug = -1;		/* defaults above */
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-static int c2_up(struct net_device *netdev);
-static int c2_down(struct net_device *netdev);
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-static void c2_tx_interrupt(struct net_device *netdev);
-static void c2_rx_interrupt(struct net_device *netdev);
-static irqreturn_t c2_interrupt(int irq, void *dev_id);
-static void c2_tx_timeout(struct net_device *netdev);
-static int c2_change_mtu(struct net_device *netdev, int new_mtu);
-static void c2_reset(struct c2_port *c2_port);
-
-static struct pci_device_id c2_pci_table[] = {
-	{ PCI_DEVICE(0x18b8, 0xb001) },
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, c2_pci_table);
-
-static void c2_set_rxbufsize(struct c2_port *c2_port)
-{
-	struct net_device *netdev = c2_port->netdev;
-
-	if (netdev->mtu > RX_BUF_SIZE)
-		c2_port->rx_buf_size =
-		    netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
-		    NET_IP_ALIGN;
-	else
-		c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
-}
-
-/*
- * Allocate TX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
-			    dma_addr_t base, void __iomem * mmio_txp_ring)
-{
-	struct c2_tx_desc *tx_desc;
-	struct c2_txp_desc __iomem *txp_desc;
-	struct c2_element *elem;
-	int i;
-
-	tx_ring->start = kmalloc_array(tx_ring->count, sizeof(*elem),
-				       GFP_KERNEL);
-	if (!tx_ring->start)
-		return -ENOMEM;
-
-	elem = tx_ring->start;
-	tx_desc = vaddr;
-	txp_desc = mmio_txp_ring;
-	for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
-		tx_desc->len = 0;
-		tx_desc->status = 0;
-
-		/* Set TXP_HTXD_UNINIT */
-		__raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-			     (void __iomem *) txp_desc + C2_TXP_ADDR);
-		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
-		__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
-
-		elem->skb = NULL;
-		elem->ht_desc = tx_desc;
-		elem->hw_desc = txp_desc;
-
-		if (i == tx_ring->count - 1) {
-			elem->next = tx_ring->start;
-			tx_desc->next_offset = base;
-		} else {
-			elem->next = elem + 1;
-			tx_desc->next_offset =
-			    base + (i + 1) * sizeof(*tx_desc);
-		}
-	}
-
-	tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
-
-	return 0;
-}
-
-/*
- * Allocate RX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
-			    dma_addr_t base, void __iomem * mmio_rxp_ring)
-{
-	struct c2_rx_desc *rx_desc;
-	struct c2_rxp_desc __iomem *rxp_desc;
-	struct c2_element *elem;
-	int i;
-
-	rx_ring->start = kmalloc_array(rx_ring->count, sizeof(*elem),
-				       GFP_KERNEL);
-	if (!rx_ring->start)
-		return -ENOMEM;
-
-	elem = rx_ring->start;
-	rx_desc = vaddr;
-	rxp_desc = mmio_rxp_ring;
-	for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
-		rx_desc->len = 0;
-		rx_desc->status = 0;
-
-		/* Set RXP_HRXD_UNINIT */
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
-		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
-		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
-		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
-		__raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
-
-		elem->skb = NULL;
-		elem->ht_desc = rx_desc;
-		elem->hw_desc = rxp_desc;
-
-		if (i == rx_ring->count - 1) {
-			elem->next = rx_ring->start;
-			rx_desc->next_offset = base;
-		} else {
-			elem->next = elem + 1;
-			rx_desc->next_offset =
-			    base + (i + 1) * sizeof(*rx_desc);
-		}
-	}
-
-	rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
-
-	return 0;
-}
-
-/* Setup buffer for receiving */
-static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
-{
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_rx_desc *rx_desc = elem->ht_desc;
-	struct sk_buff *skb;
-	dma_addr_t mapaddr;
-	u32 maplen;
-	struct c2_rxp_hdr *rxp_hdr;
-
-	skb = dev_alloc_skb(c2_port->rx_buf_size);
-	if (unlikely(!skb)) {
-		pr_debug("%s: out of memory for receive\n",
-			c2_port->netdev->name);
-		return -ENOMEM;
-	}
-
-	/* Zero out the rxp hdr in the sk_buff */
-	memset(skb->data, 0, sizeof(*rxp_hdr));
-
-	skb->dev = c2_port->netdev;
-
-	maplen = c2_port->rx_buf_size;
-	mapaddr =
-	    pci_map_single(c2dev->pcidev, skb->data, maplen,
-			   PCI_DMA_FROMDEVICE);
-
-	/* Set the sk_buff RXP_header to RXP_HRXD_READY */
-	rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-	rxp_hdr->flags = RXP_HRXD_READY;
-
-	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-	__raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
-		     elem->hw_desc + C2_RXP_LEN);
-	__raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
-	__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-		     elem->hw_desc + C2_RXP_FLAGS);
-
-	elem->skb = skb;
-	elem->mapaddr = mapaddr;
-	elem->maplen = maplen;
-	rx_desc->len = maplen;
-
-	return 0;
-}
-
-/*
- * Allocate buffers for the Rx ring
- * For receive:  rx_ring.to_clean is next received frame
- */
-static int c2_rx_fill(struct c2_port *c2_port)
-{
-	struct c2_ring *rx_ring = &c2_port->rx_ring;
-	struct c2_element *elem;
-	int ret = 0;
-
-	elem = rx_ring->start;
-	do {
-		if (c2_rx_alloc(c2_port, elem)) {
-			ret = 1;
-			break;
-		}
-	} while ((elem = elem->next) != rx_ring->start);
-
-	rx_ring->to_clean = rx_ring->start;
-	return ret;
-}
-
-/* Free all buffers in RX ring, assumes receiver stopped */
-static void c2_rx_clean(struct c2_port *c2_port)
-{
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *rx_ring = &c2_port->rx_ring;
-	struct c2_element *elem;
-	struct c2_rx_desc *rx_desc;
-
-	elem = rx_ring->start;
-	do {
-		rx_desc = elem->ht_desc;
-		rx_desc->len = 0;
-
-		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
-		__raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-			     elem->hw_desc + C2_RXP_ADDR);
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-			     elem->hw_desc + C2_RXP_FLAGS);
-
-		if (elem->skb) {
-			pci_unmap_single(c2dev->pcidev, elem->mapaddr,
-					 elem->maplen, PCI_DMA_FROMDEVICE);
-			dev_kfree_skb(elem->skb);
-			elem->skb = NULL;
-		}
-	} while ((elem = elem->next) != rx_ring->start);
-}
-
-static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
-{
-	struct c2_tx_desc *tx_desc = elem->ht_desc;
-
-	tx_desc->len = 0;
-
-	pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
-			 PCI_DMA_TODEVICE);
-
-	if (elem->skb) {
-		dev_kfree_skb_any(elem->skb);
-		elem->skb = NULL;
-	}
-
-	return 0;
-}
-
-/* Free all buffers in TX ring, assumes transmitter stopped */
-static void c2_tx_clean(struct c2_port *c2_port)
-{
-	struct c2_ring *tx_ring = &c2_port->tx_ring;
-	struct c2_element *elem;
-	struct c2_txp_desc txp_htxd;
-	int retry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-	elem = tx_ring->start;
-
-	do {
-		retry = 0;
-		do {
-			txp_htxd.flags =
-			    readw(elem->hw_desc + C2_TXP_FLAGS);
-
-			if (txp_htxd.flags == TXP_HTXD_READY) {
-				retry = 1;
-				__raw_writew(0,
-					     elem->hw_desc + C2_TXP_LEN);
-				__raw_writeq(0,
-					     elem->hw_desc + C2_TXP_ADDR);
-				__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
-					     elem->hw_desc + C2_TXP_FLAGS);
-				c2_port->netdev->stats.tx_dropped++;
-				break;
-			} else {
-				__raw_writew(0,
-					     elem->hw_desc + C2_TXP_LEN);
-				__raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-					     elem->hw_desc + C2_TXP_ADDR);
-				__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-					     elem->hw_desc + C2_TXP_FLAGS);
-			}
-
-			c2_tx_free(c2_port->c2dev, elem);
-
-		} while ((elem = elem->next) != tx_ring->start);
-	} while (retry);
-
-	c2_port->tx_avail = c2_port->tx_ring.count - 1;
-	c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
-
-	if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-		netif_wake_queue(c2_port->netdev);
-
-	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-}
-
-/*
- * Process transmit descriptors marked 'DONE' by the firmware,
- * freeing up their unneeded sk_buffs.
- */
-static void c2_tx_interrupt(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *tx_ring = &c2_port->tx_ring;
-	struct c2_element *elem;
-	struct c2_txp_desc txp_htxd;
-
-	spin_lock(&c2_port->tx_lock);
-
-	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
-	     elem = elem->next) {
-		txp_htxd.flags =
-		    be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
-
-		if (txp_htxd.flags != TXP_HTXD_DONE)
-			break;
-
-		if (netif_msg_tx_done(c2_port)) {
-			/* PCI reads are expensive in fast path */
-			txp_htxd.len =
-			    be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
-			pr_debug("%s: tx done slot %3Zu status 0x%x len "
-				"%5u bytes\n",
-				netdev->name, elem - tx_ring->start,
-				txp_htxd.flags, txp_htxd.len);
-		}
-
-		c2_tx_free(c2dev, elem);
-		++(c2_port->tx_avail);
-	}
-
-	tx_ring->to_clean = elem;
-
-	if (netif_queue_stopped(netdev)
-	    && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-		netif_wake_queue(netdev);
-
-	spin_unlock(&c2_port->tx_lock);
-}
-
-static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
-{
-	struct c2_rx_desc *rx_desc = elem->ht_desc;
-	struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-
-	if (rxp_hdr->status != RXP_HRXD_OK ||
-	    rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
-		pr_debug("BAD RXP_HRXD\n");
-		pr_debug("  rx_desc : %p\n", rx_desc);
-		pr_debug("    index : %Zu\n",
-			elem - c2_port->rx_ring.start);
-		pr_debug("    len   : %u\n", rx_desc->len);
-		pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
-			(void *) __pa((unsigned long) rxp_hdr));
-		pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
-		pr_debug("    status: 0x%x\n", rxp_hdr->status);
-		pr_debug("    len   : %u\n", rxp_hdr->len);
-		pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
-	}
-
-	/* Setup the skb for reuse since we're dropping this pkt */
-	elem->skb->data = elem->skb->head;
-	skb_reset_tail_pointer(elem->skb);
-
-	/* Zero out the rxp hdr in the sk_buff */
-	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
-
-	/* Write the descriptor to the adapter's rx ring */
-	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-	__raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
-		     elem->hw_desc + C2_RXP_LEN);
-	__raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
-		     elem->hw_desc + C2_RXP_ADDR);
-	__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-		     elem->hw_desc + C2_RXP_FLAGS);
-
-	pr_debug("packet dropped\n");
-	c2_port->netdev->stats.rx_dropped++;
-}
-
-static void c2_rx_interrupt(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *rx_ring = &c2_port->rx_ring;
-	struct c2_element *elem;
-	struct c2_rx_desc *rx_desc;
-	struct c2_rxp_hdr *rxp_hdr;
-	struct sk_buff *skb;
-	dma_addr_t mapaddr;
-	u32 maplen, buflen;
-	unsigned long flags;
-
-	spin_lock_irqsave(&c2dev->lock, flags);
-
-	/* Begin where we left off */
-	rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
-
-	for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
-	     elem = elem->next) {
-		rx_desc = elem->ht_desc;
-		mapaddr = elem->mapaddr;
-		maplen = elem->maplen;
-		skb = elem->skb;
-		rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-
-		if (rxp_hdr->flags != RXP_HRXD_DONE)
-			break;
-		buflen = rxp_hdr->len;
-
-		/* Sanity check the RXP header */
-		if (rxp_hdr->status != RXP_HRXD_OK ||
-		    buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
-			c2_rx_error(c2_port, elem);
-			continue;
-		}
-
-		/*
-		 * Allocate and map a new skb for replenishing the host
-		 * RX desc
-		 */
-		if (c2_rx_alloc(c2_port, elem)) {
-			c2_rx_error(c2_port, elem);
-			continue;
-		}
-
-		/* Unmap the old skb */
-		pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
-				 PCI_DMA_FROMDEVICE);
-
-		prefetch(skb->data);
-
-		/*
-		 * Skip past the leading 8 bytes comprising of the
-		 * "struct c2_rxp_hdr", prepended by the adapter
-		 * to the usual Ethernet header ("struct ethhdr"),
-		 * to the start of the raw Ethernet packet.
-		 *
-		 * Fix up the various fields in the sk_buff before
-		 * passing it up to netif_rx(). The transfer size
-		 * (in bytes) specified by the adapter len field of
-		 * the "struct rxp_hdr_t" does NOT include the
-		 * "sizeof(struct c2_rxp_hdr)".
-		 */
-		skb->data += sizeof(*rxp_hdr);
-		skb_set_tail_pointer(skb, buflen);
-		skb->len = buflen;
-		skb->protocol = eth_type_trans(skb, netdev);
-
-		netif_rx(skb);
-
-		netdev->stats.rx_packets++;
-		netdev->stats.rx_bytes += buflen;
-	}
-
-	/* Save where we left off */
-	rx_ring->to_clean = elem;
-	c2dev->cur_rx = elem - rx_ring->start;
-	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-	spin_unlock_irqrestore(&c2dev->lock, flags);
-}
-
-/*
- * Handle netisr0 TX & RX interrupts.
- */
-static irqreturn_t c2_interrupt(int irq, void *dev_id)
-{
-	unsigned int netisr0, dmaisr;
-	int handled = 0;
-	struct c2_dev *c2dev = dev_id;
-
-	/* Process CCILNET interrupts */
-	netisr0 = readl(c2dev->regs + C2_NISR0);
-	if (netisr0) {
-
-		/*
-		 * There is an issue with the firmware that always
-		 * provides the status of RX for both TX & RX
-		 * interrupts.  So process both queues here.
-		 */
-		c2_rx_interrupt(c2dev->netdev);
-		c2_tx_interrupt(c2dev->netdev);
-
-		/* Clear the interrupt */
-		writel(netisr0, c2dev->regs + C2_NISR0);
-		handled++;
-	}
-
-	/* Process RNIC interrupts */
-	dmaisr = readl(c2dev->regs + C2_DISR);
-	if (dmaisr) {
-		writel(dmaisr, c2dev->regs + C2_DISR);
-		c2_rnic_interrupt(c2dev);
-		handled++;
-	}
-
-	if (handled) {
-		return IRQ_HANDLED;
-	} else {
-		return IRQ_NONE;
-	}
-}
-
-static int c2_up(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_element *elem;
-	struct c2_rxp_hdr *rxp_hdr;
-	struct in_device *in_dev;
-	size_t rx_size, tx_size;
-	int ret, i;
-	unsigned int netimr0;
-
-	if (netif_msg_ifup(c2_port))
-		pr_debug("%s: enabling interface\n", netdev->name);
-
-	/* Set the Rx buffer size based on MTU */
-	c2_set_rxbufsize(c2_port);
-
-	/* Allocate DMA'able memory for Tx/Rx host descriptor rings */
-	rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
-	tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
-
-	c2_port->mem_size = tx_size + rx_size;
-	c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
-					     &c2_port->dma);
-	if (c2_port->mem == NULL) {
-		pr_debug("Unable to allocate memory for "
-			"host descriptor rings\n");
-		return -ENOMEM;
-	}
-
-	/* Create the Rx host descriptor ring */
-	if ((ret =
-	     c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
-			      c2dev->mmio_rxp_ring))) {
-		pr_debug("Unable to create RX ring\n");
-		goto bail0;
-	}
-
-	/* Allocate Rx buffers for the host descriptor ring */
-	if (c2_rx_fill(c2_port)) {
-		pr_debug("Unable to fill RX ring\n");
-		goto bail1;
-	}
-
-	/* Create the Tx host descriptor ring */
-	if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
-				    c2_port->dma + rx_size,
-				    c2dev->mmio_txp_ring))) {
-		pr_debug("Unable to create TX ring\n");
-		goto bail1;
-	}
-
-	/* Set the TX pointer to where we left off */
-	c2_port->tx_avail = c2_port->tx_ring.count - 1;
-	c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
-	    c2_port->tx_ring.start + c2dev->cur_tx;
-
-	/* missing: Initialize MAC */
-
-	BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
-
-	/* Reset the adapter, ensures the driver is in sync with the RXP */
-	c2_reset(c2_port);
-
-	/* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
-	for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
-	     i++, elem++) {
-		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-		rxp_hdr->flags = 0;
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-			     elem->hw_desc + C2_RXP_FLAGS);
-	}
-
-	/* Enable network packets */
-	netif_start_queue(netdev);
-
-	/* Enable IRQ */
-	writel(0, c2dev->regs + C2_IDIS);
-	netimr0 = readl(c2dev->regs + C2_NIMR0);
-	netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
-	writel(netimr0, c2dev->regs + C2_NIMR0);
-
-	/* Tell the stack to ignore arp requests for ipaddrs bound to
-	 * other interfaces.  This is needed to prevent the host stack
-	 * from responding to arp requests to the ipaddr bound on the
-	 * rdma interface.
-	 */
-	in_dev = in_dev_get(netdev);
-	IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
-	in_dev_put(in_dev);
-
-	return 0;
-
-bail1:
-	c2_rx_clean(c2_port);
-	kfree(c2_port->rx_ring.start);
-
-bail0:
-	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-			    c2_port->dma);
-
-	return ret;
-}
-
-static int c2_down(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-
-	if (netif_msg_ifdown(c2_port))
-		pr_debug("%s: disabling interface\n",
-			netdev->name);
-
-	/* Wait for all the queued packets to get sent */
-	c2_tx_interrupt(netdev);
-
-	/* Disable network packets */
-	netif_stop_queue(netdev);
-
-	/* Disable IRQs by clearing the interrupt mask */
-	writel(1, c2dev->regs + C2_IDIS);
-	writel(0, c2dev->regs + C2_NIMR0);
-
-	/* missing: Stop transmitter */
-
-	/* missing: Stop receiver */
-
-	/* Reset the adapter, ensures the driver is in sync with the RXP */
-	c2_reset(c2_port);
-
-	/* missing: Turn off LEDs here */
-
-	/* Free all buffers in the host descriptor rings */
-	c2_tx_clean(c2_port);
-	c2_rx_clean(c2_port);
-
-	/* Free the host descriptor rings */
-	kfree(c2_port->rx_ring.start);
-	kfree(c2_port->tx_ring.start);
-	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-			    c2_port->dma);
-
-	return 0;
-}
-
-static void c2_reset(struct c2_port *c2_port)
-{
-	struct c2_dev *c2dev = c2_port->c2dev;
-	unsigned int cur_rx = c2dev->cur_rx;
-
-	/* Tell the hardware to quiesce */
-	C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
-
-	/*
-	 * The hardware will reset the C2_PCI_HRX_QUI bit once
-	 * the RXP is quiesced.  Wait 2 seconds for this.
-	 */
-	ssleep(2);
-
-	cur_rx = C2_GET_CUR_RX(c2dev);
-
-	if (cur_rx & C2_PCI_HRX_QUI)
-		pr_debug("c2_reset: failed to quiesce the hardware!\n");
-
-	cur_rx &= ~C2_PCI_HRX_QUI;
-
-	c2dev->cur_rx = cur_rx;
-
-	pr_debug("Current RX: %u\n", c2dev->cur_rx);
-}
-
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *tx_ring = &c2_port->tx_ring;
-	struct c2_element *elem;
-	dma_addr_t mapaddr;
-	u32 maplen;
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-	if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
-		netif_stop_queue(netdev);
-		spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-		pr_debug("%s: Tx ring full when queue awake!\n",
-			netdev->name);
-		return NETDEV_TX_BUSY;
-	}
-
-	maplen = skb_headlen(skb);
-	mapaddr =
-	    pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
-
-	elem = tx_ring->to_use;
-	elem->skb = skb;
-	elem->mapaddr = mapaddr;
-	elem->maplen = maplen;
-
-	/* Tell HW to xmit */
-	__raw_writeq((__force u64) cpu_to_be64(mapaddr),
-		     elem->hw_desc + C2_TXP_ADDR);
-	__raw_writew((__force u16) cpu_to_be16(maplen),
-		     elem->hw_desc + C2_TXP_LEN);
-	__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-		     elem->hw_desc + C2_TXP_FLAGS);
-
-	netdev->stats.tx_packets++;
-	netdev->stats.tx_bytes += maplen;
-
-	/* Loop thru additional data fragments and queue them */
-	if (skb_shinfo(skb)->nr_frags) {
-		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-			maplen = skb_frag_size(frag);
-			mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag,
-						   0, maplen, DMA_TO_DEVICE);
-			elem = elem->next;
-			elem->skb = NULL;
-			elem->mapaddr = mapaddr;
-			elem->maplen = maplen;
-
-			/* Tell HW to xmit */
-			__raw_writeq((__force u64) cpu_to_be64(mapaddr),
-				     elem->hw_desc + C2_TXP_ADDR);
-			__raw_writew((__force u16) cpu_to_be16(maplen),
-				     elem->hw_desc + C2_TXP_LEN);
-			__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-				     elem->hw_desc + C2_TXP_FLAGS);
-
-			netdev->stats.tx_packets++;
-			netdev->stats.tx_bytes += maplen;
-		}
-	}
-
-	tx_ring->to_use = elem->next;
-	c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
-
-	if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
-		netif_stop_queue(netdev);
-		if (netif_msg_tx_queued(c2_port))
-			pr_debug("%s: transmit queue full\n",
-				netdev->name);
-	}
-
-	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-	netdev->trans_start = jiffies;
-
-	return NETDEV_TX_OK;
-}
-
-static void c2_tx_timeout(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-
-	if (netif_msg_timer(c2_port))
-		pr_debug("%s: tx timeout\n", netdev->name);
-
-	c2_tx_clean(c2_port);
-}
-
-static int c2_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	int ret = 0;
-
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-		return -EINVAL;
-
-	netdev->mtu = new_mtu;
-
-	if (netif_running(netdev)) {
-		c2_down(netdev);
-
-		c2_up(netdev);
-	}
-
-	return ret;
-}
-
-static const struct net_device_ops c2_netdev = {
-	.ndo_open 		= c2_up,
-	.ndo_stop 		= c2_down,
-	.ndo_start_xmit		= c2_xmit_frame,
-	.ndo_tx_timeout		= c2_tx_timeout,
-	.ndo_change_mtu		= c2_change_mtu,
-	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-/* Initialize network device */
-static struct net_device *c2_devinit(struct c2_dev *c2dev,
-				     void __iomem * mmio_addr)
-{
-	struct c2_port *c2_port = NULL;
-	struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
-
-	if (!netdev) {
-		pr_debug("c2_port etherdev alloc failed");
-		return NULL;
-	}
-
-	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-	netdev->netdev_ops = &c2_netdev;
-	netdev->watchdog_timeo = C2_TX_TIMEOUT;
-	netdev->irq = c2dev->pcidev->irq;
-
-	c2_port = netdev_priv(netdev);
-	c2_port->netdev = netdev;
-	c2_port->c2dev = c2dev;
-	c2_port->msg_enable = netif_msg_init(debug, default_msg);
-	c2_port->tx_ring.count = C2_NUM_TX_DESC;
-	c2_port->rx_ring.count = C2_NUM_RX_DESC;
-
-	spin_lock_init(&c2_port->tx_lock);
-
-	/* Copy our 48-bit ethernet hardware address */
-	memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
-
-	/* Validate the MAC address */
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
-		pr_debug("Invalid MAC Address\n");
-		pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name,
-			 netdev->dev_addr, netdev->irq);
-		free_netdev(netdev);
-		return NULL;
-	}
-
-	c2dev->netdev = netdev;
-
-	return netdev;
-}
-
-static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
-	int ret = 0, i;
-	unsigned long reg0_start, reg0_flags, reg0_len;
-	unsigned long reg2_start, reg2_flags, reg2_len;
-	unsigned long reg4_start, reg4_flags, reg4_len;
-	unsigned kva_map_size;
-	struct net_device *netdev = NULL;
-	struct c2_dev *c2dev = NULL;
-	void __iomem *mmio_regs = NULL;
-
-	printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
-		DRV_VERSION);
-
-	/* Enable PCI device */
-	ret = pci_enable_device(pcidev);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
-			pci_name(pcidev));
-		goto bail0;
-	}
-
-	reg0_start = pci_resource_start(pcidev, BAR_0);
-	reg0_len = pci_resource_len(pcidev, BAR_0);
-	reg0_flags = pci_resource_flags(pcidev, BAR_0);
-
-	reg2_start = pci_resource_start(pcidev, BAR_2);
-	reg2_len = pci_resource_len(pcidev, BAR_2);
-	reg2_flags = pci_resource_flags(pcidev, BAR_2);
-
-	reg4_start = pci_resource_start(pcidev, BAR_4);
-	reg4_len = pci_resource_len(pcidev, BAR_4);
-	reg4_flags = pci_resource_flags(pcidev, BAR_4);
-
-	pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
-	pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
-	pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
-
-	/* Make sure PCI base addr are MMIO */
-	if (!(reg0_flags & IORESOURCE_MEM) ||
-	    !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
-		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
-		ret = -ENODEV;
-		goto bail1;
-	}
-
-	/* Check for weird/broken PCI region reporting */
-	if ((reg0_len < C2_REG0_SIZE) ||
-	    (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
-		printk(KERN_ERR PFX "Invalid PCI region sizes\n");
-		ret = -ENODEV;
-		goto bail1;
-	}
-
-	/* Reserve PCI I/O and memory resources */
-	ret = pci_request_regions(pcidev, DRV_NAME);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: Unable to request regions\n",
-			pci_name(pcidev));
-		goto bail1;
-	}
-
-	if ((sizeof(dma_addr_t) > 4)) {
-		ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
-		if (ret < 0) {
-			printk(KERN_ERR PFX "64b DMA configuration failed\n");
-			goto bail2;
-		}
-	} else {
-		ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
-		if (ret < 0) {
-			printk(KERN_ERR PFX "32b DMA configuration failed\n");
-			goto bail2;
-		}
-	}
-
-	/* Enables bus-mastering on the device */
-	pci_set_master(pcidev);
-
-	/* Remap the adapter PCI registers in BAR4 */
-	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-				    sizeof(struct c2_adapter_pci_regs));
-	if (!mmio_regs) {
-		printk(KERN_ERR PFX
-			"Unable to remap adapter PCI registers in BAR4\n");
-		ret = -EIO;
-		goto bail2;
-	}
-
-	/* Validate PCI regs magic */
-	for (i = 0; i < sizeof(c2_magic); i++) {
-		if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
-			printk(KERN_ERR PFX "Downlevel Firmware boot loader "
-				"[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
-			       "utility to update your boot loader\n",
-				i + 1, sizeof(c2_magic),
-				readb(mmio_regs + C2_REGS_MAGIC + i),
-				c2_magic[i]);
-			printk(KERN_ERR PFX "Adapter not claimed\n");
-			iounmap(mmio_regs);
-			ret = -EIO;
-			goto bail2;
-		}
-	}
-
-	/* Validate the adapter version */
-	if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
-		printk(KERN_ERR PFX "Version mismatch "
-			"[fw=%u, c2=%u], Adapter not claimed\n",
-			be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
-			C2_VERSION);
-		ret = -EINVAL;
-		iounmap(mmio_regs);
-		goto bail2;
-	}
-
-	/* Validate the adapter IVN */
-	if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
-		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
-		       "the OpenIB device support kit. "
-		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
-		       C2_IVN);
-		ret = -EINVAL;
-		iounmap(mmio_regs);
-		goto bail2;
-	}
-
-	/* Allocate hardware structure */
-	c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
-	if (!c2dev) {
-		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
-			pci_name(pcidev));
-		ret = -ENOMEM;
-		iounmap(mmio_regs);
-		goto bail2;
-	}
-
-	memset(c2dev, 0, sizeof(*c2dev));
-	spin_lock_init(&c2dev->lock);
-	c2dev->pcidev = pcidev;
-	c2dev->cur_tx = 0;
-
-	/* Get the last RX index */
-	c2dev->cur_rx =
-	    (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
-	     0xffffc000) / sizeof(struct c2_rxp_desc);
-
-	/* Request an interrupt line for the driver */
-	ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
-			pci_name(pcidev), pcidev->irq);
-		iounmap(mmio_regs);
-		goto bail3;
-	}
-
-	/* Set driver specific data */
-	pci_set_drvdata(pcidev, c2dev);
-
-	/* Initialize network device */
-	if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
-		ret = -ENOMEM;
-		iounmap(mmio_regs);
-		goto bail4;
-	}
-
-	/* Save off the actual size prior to unmapping mmio_regs */
-	kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
-
-	/* Unmap the adapter PCI registers in BAR4 */
-	iounmap(mmio_regs);
-
-	/* Register network device */
-	ret = register_netdev(netdev);
-	if (ret) {
-		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
-			ret);
-		goto bail5;
-	}
-
-	/* Disable network packets */
-	netif_stop_queue(netdev);
-
-	/* Remap the adapter HRXDQ PA space to kernel VA space */
-	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
-					       C2_RXP_HRXDQ_SIZE);
-	if (!c2dev->mmio_rxp_ring) {
-		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
-		ret = -EIO;
-		goto bail6;
-	}
-
-	/* Remap the adapter HTXDQ PA space to kernel VA space */
-	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
-					       C2_TXP_HTXDQ_SIZE);
-	if (!c2dev->mmio_txp_ring) {
-		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
-		ret = -EIO;
-		goto bail7;
-	}
-
-	/* Save off the current RX index in the last 4 bytes of the TXP Ring */
-	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
-	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
-	if (!c2dev->regs) {
-		printk(KERN_ERR PFX "Unable to remap BAR0\n");
-		ret = -EIO;
-		goto bail8;
-	}
-
-	/* Remap the PCI registers in adapter BAR4 to kernel VA space */
-	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
-	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-				     kva_map_size);
-	if (!c2dev->kva) {
-		printk(KERN_ERR PFX "Unable to remap BAR4\n");
-		ret = -EIO;
-		goto bail9;
-	}
-
-	/* Print out the MAC address */
-	pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr,
-		 netdev->irq);
-
-	ret = c2_rnic_init(c2dev);
-	if (ret) {
-		printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
-		goto bail10;
-	}
-
-	ret = c2_register_device(c2dev);
-	if (ret)
-		goto bail10;
-
-	return 0;
-
- bail10:
-	iounmap(c2dev->kva);
-
- bail9:
-	iounmap(c2dev->regs);
-
- bail8:
-	iounmap(c2dev->mmio_txp_ring);
-
- bail7:
-	iounmap(c2dev->mmio_rxp_ring);
-
- bail6:
-	unregister_netdev(netdev);
-
- bail5:
-	free_netdev(netdev);
-
- bail4:
-	free_irq(pcidev->irq, c2dev);
-
- bail3:
-	ib_dealloc_device(&c2dev->ibdev);
-
- bail2:
-	pci_release_regions(pcidev);
-
- bail1:
-	pci_disable_device(pcidev);
-
- bail0:
-	return ret;
-}
-
-static void c2_remove(struct pci_dev *pcidev)
-{
-	struct c2_dev *c2dev = pci_get_drvdata(pcidev);
-	struct net_device *netdev = c2dev->netdev;
-
-	/* Unregister with OpenIB */
-	c2_unregister_device(c2dev);
-
-	/* Clean up the RNIC resources */
-	c2_rnic_term(c2dev);
-
-	/* Remove network device from the kernel */
-	unregister_netdev(netdev);
-
-	/* Free network device */
-	free_netdev(netdev);
-
-	/* Free the interrupt line */
-	free_irq(pcidev->irq, c2dev);
-
-	/* missing: Turn LEDs off here */
-
-	/* Unmap adapter PA space */
-	iounmap(c2dev->kva);
-	iounmap(c2dev->regs);
-	iounmap(c2dev->mmio_txp_ring);
-	iounmap(c2dev->mmio_rxp_ring);
-
-	/* Free the hardware structure */
-	ib_dealloc_device(&c2dev->ibdev);
-
-	/* Release reserved PCI I/O and memory resources */
-	pci_release_regions(pcidev);
-
-	/* Disable PCI device */
-	pci_disable_device(pcidev);
-
-	/* Clear driver specific data */
-	pci_set_drvdata(pcidev, NULL);
-}
-
-static struct pci_driver c2_pci_driver = {
-	.name = DRV_NAME,
-	.id_table = c2_pci_table,
-	.probe = c2_probe,
-	.remove = c2_remove,
-};
-
-module_pci_driver(c2_pci_driver);
diff --git a/drivers/staging/rdma/amso1100/c2.h b/drivers/staging/rdma/amso1100/c2.h
deleted file mode 100644
index 21b565a..0000000
--- a/drivers/staging/rdma/amso1100/c2.h
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __C2_H
-#define __C2_H
-
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/idr.h>
-
-#include "c2_provider.h"
-#include "c2_mq.h"
-#include "c2_status.h"
-
-#define DRV_NAME     "c2"
-#define DRV_VERSION  "1.1"
-#define PFX          DRV_NAME ": "
-
-#define BAR_0                0
-#define BAR_2                2
-#define BAR_4                4
-
-#define RX_BUF_SIZE         (1536 + 8)
-#define ETH_JUMBO_MTU        9000
-#define C2_MAGIC            "CEPHEUS"
-#define C2_VERSION           4
-#define C2_IVN              (18 & 0x7fffffff)
-
-#define C2_REG0_SIZE        (16 * 1024)
-#define C2_REG2_SIZE        (2 * 1024 * 1024)
-#define C2_REG4_SIZE        (256 * 1024 * 1024)
-#define C2_NUM_TX_DESC       341
-#define C2_NUM_RX_DESC       256
-#define C2_PCI_REGS_OFFSET  (0x10000)
-#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
-#define C2_RXP_HRXDQ_SIZE   (4096)
-#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
-#define C2_TXP_HTXDQ_SIZE   (4096)
-#define C2_TX_TIMEOUT	    (6*HZ)
-
-/* CEPHEUS */
-static const u8 c2_magic[] = {
-	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
-};
-
-enum adapter_pci_regs {
-	C2_REGS_MAGIC = 0x0000,
-	C2_REGS_VERS = 0x0008,
-	C2_REGS_IVN = 0x000C,
-	C2_REGS_PCI_WINSIZE = 0x0010,
-	C2_REGS_Q0_QSIZE = 0x0014,
-	C2_REGS_Q0_MSGSIZE = 0x0018,
-	C2_REGS_Q0_POOLSTART = 0x001C,
-	C2_REGS_Q0_SHARED = 0x0020,
-	C2_REGS_Q1_QSIZE = 0x0024,
-	C2_REGS_Q1_MSGSIZE = 0x0028,
-	C2_REGS_Q1_SHARED = 0x0030,
-	C2_REGS_Q2_QSIZE = 0x0034,
-	C2_REGS_Q2_MSGSIZE = 0x0038,
-	C2_REGS_Q2_SHARED = 0x0040,
-	C2_REGS_ENADDR = 0x004C,
-	C2_REGS_RDMA_ENADDR = 0x0054,
-	C2_REGS_HRX_CUR = 0x006C,
-};
-
-struct c2_adapter_pci_regs {
-	char reg_magic[8];
-	u32 version;
-	u32 ivn;
-	u32 pci_window_size;
-	u32 q0_q_size;
-	u32 q0_msg_size;
-	u32 q0_pool_start;
-	u32 q0_shared;
-	u32 q1_q_size;
-	u32 q1_msg_size;
-	u32 q1_pool_start;
-	u32 q1_shared;
-	u32 q2_q_size;
-	u32 q2_msg_size;
-	u32 q2_pool_start;
-	u32 q2_shared;
-	u32 log_start;
-	u32 log_size;
-	u8 host_enaddr[8];
-	u8 rdma_enaddr[8];
-	u32 crash_entry;
-	u32 crash_ready[2];
-	u32 fw_txd_cur;
-	u32 fw_hrxd_cur;
-	u32 fw_rxd_cur;
-};
-
-enum pci_regs {
-	C2_HISR = 0x0000,
-	C2_DISR = 0x0004,
-	C2_HIMR = 0x0008,
-	C2_DIMR = 0x000C,
-	C2_NISR0 = 0x0010,
-	C2_NISR1 = 0x0014,
-	C2_NIMR0 = 0x0018,
-	C2_NIMR1 = 0x001C,
-	C2_IDIS = 0x0020,
-};
-
-enum {
-	C2_PCI_HRX_INT = 1 << 8,
-	C2_PCI_HTX_INT = 1 << 17,
-	C2_PCI_HRX_QUI = 1 << 31,
-};
-
-/*
- * Cepheus registers in BAR0.
- */
-struct c2_pci_regs {
-	u32 hostisr;
-	u32 dmaisr;
-	u32 hostimr;
-	u32 dmaimr;
-	u32 netisr0;
-	u32 netisr1;
-	u32 netimr0;
-	u32 netimr1;
-	u32 int_disable;
-};
-
-/* TXP flags */
-enum c2_txp_flags {
-	TXP_HTXD_DONE = 0,
-	TXP_HTXD_READY = 1 << 0,
-	TXP_HTXD_UNINIT = 1 << 1,
-};
-
-/* RXP flags */
-enum c2_rxp_flags {
-	RXP_HRXD_UNINIT = 0,
-	RXP_HRXD_READY = 1 << 0,
-	RXP_HRXD_DONE = 1 << 1,
-};
-
-/* RXP status */
-enum c2_rxp_status {
-	RXP_HRXD_ZERO = 0,
-	RXP_HRXD_OK = 1 << 0,
-	RXP_HRXD_BUF_OV = 1 << 1,
-};
-
-/* TXP descriptor fields */
-enum txp_desc {
-	C2_TXP_FLAGS = 0x0000,
-	C2_TXP_LEN = 0x0002,
-	C2_TXP_ADDR = 0x0004,
-};
-
-/* RXP descriptor fields */
-enum rxp_desc {
-	C2_RXP_FLAGS = 0x0000,
-	C2_RXP_STATUS = 0x0002,
-	C2_RXP_COUNT = 0x0004,
-	C2_RXP_LEN = 0x0006,
-	C2_RXP_ADDR = 0x0008,
-};
-
-struct c2_txp_desc {
-	u16 flags;
-	u16 len;
-	u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_desc {
-	u16 flags;
-	u16 status;
-	u16 count;
-	u16 len;
-	u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_hdr {
-	u16 flags;
-	u16 status;
-	u16 len;
-	u16 rsvd;
-} __attribute__ ((packed));
-
-struct c2_tx_desc {
-	u32 len;
-	u32 status;
-	dma_addr_t next_offset;
-};
-
-struct c2_rx_desc {
-	u32 len;
-	u32 status;
-	dma_addr_t next_offset;
-};
-
-struct c2_alloc {
-	u32 last;
-	u32 max;
-	spinlock_t lock;
-	unsigned long *table;
-};
-
-struct c2_array {
-	struct {
-		void **page;
-		int used;
-	} *page_list;
-};
-
-/*
- * The MQ shared pointer pool is organized as a linked list of
- * chunks. Each chunk contains a linked list of free shared pointers
- * that can be allocated to a given user mode client.
- *
- */
-struct sp_chunk {
-	struct sp_chunk *next;
-	dma_addr_t dma_addr;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	u16 head;
-	u16 shared_ptr[0];
-};
-
-struct c2_pd_table {
-	u32 last;
-	u32 max;
-	spinlock_t lock;
-	unsigned long *table;
-};
-
-struct c2_qp_table {
-	struct idr idr;
-	spinlock_t lock;
-};
-
-struct c2_element {
-	struct c2_element *next;
-	void *ht_desc;		/* host     descriptor */
-	void __iomem *hw_desc;	/* hardware descriptor */
-	struct sk_buff *skb;
-	dma_addr_t mapaddr;
-	u32 maplen;
-};
-
-struct c2_ring {
-	struct c2_element *to_clean;
-	struct c2_element *to_use;
-	struct c2_element *start;
-	unsigned long count;
-};
-
-struct c2_dev {
-	struct ib_device ibdev;
-	void __iomem *regs;
-	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
-	void __iomem *mmio_rxp_ring;
-	spinlock_t lock;
-	struct pci_dev *pcidev;
-	struct net_device *netdev;
-	struct net_device *pseudo_netdev;
-	unsigned int cur_tx;
-	unsigned int cur_rx;
-	u32 adapter_handle;
-	int device_cap_flags;
-	void __iomem *kva;	/* KVA device memory */
-	unsigned long pa;	/* PA device memory */
-	void **qptr_array;
-
-	struct kmem_cache *host_msg_cache;
-
-	struct list_head cca_link;		/* adapter list */
-	struct list_head eh_wakeup_list;	/* event wakeup list */
-	wait_queue_head_t req_vq_wo;
-
-	/* Cached RNIC properties */
-	struct ib_device_attr props;
-
-	struct c2_pd_table pd_table;
-	struct c2_qp_table qp_table;
-	int ports;		/* num of GigE ports */
-	int devnum;
-	spinlock_t vqlock;	/* sync vbs req MQ */
-
-	/* Verbs Queues */
-	struct c2_mq req_vq;	/* Verbs Request MQ */
-	struct c2_mq rep_vq;	/* Verbs Reply MQ */
-	struct c2_mq aeq;	/* Async Events MQ */
-
-	/* Kernel client MQs */
-	struct sp_chunk *kern_mqsp_pool;
-
-	/* Device updates these values when posting messages to a host
-	 * target queue */
-	u16 req_vq_shared;
-	u16 rep_vq_shared;
-	u16 aeq_shared;
-	u16 irq_claimed;
-
-	/*
-	 * Shared host target pages for user-accessible MQs.
-	 */
-	int hthead;		/* index of first free entry */
-	void *htpages;		/* kernel vaddr */
-	int htlen;		/* length of htpages memory */
-	void *htuva;		/* user mapped vaddr */
-	spinlock_t htlock;	/* serialize allocation */
-
-	u64 adapter_hint_uva;	/* access to the activity FIFO */
-
-	//	spinlock_t aeq_lock;
-	//	spinlock_t rnic_lock;
-
-	__be16 *hint_count;
-	dma_addr_t hint_count_dma;
-	u16 hints_read;
-
-	int init;		/* TRUE if it's ready */
-	char ae_cache_name[16];
-	char vq_cache_name[16];
-};
-
-struct c2_port {
-	u32 msg_enable;
-	struct c2_dev *c2dev;
-	struct net_device *netdev;
-
-	spinlock_t tx_lock;
-	u32 tx_avail;
-	struct c2_ring tx_ring;
-	struct c2_ring rx_ring;
-
-	void *mem;		/* PCI memory for host rings */
-	dma_addr_t dma;
-	unsigned long mem_size;
-
-	u32 rx_buf_size;
-};
-
-/*
- * Activity FIFO registers in BAR0.
- */
-#define PCI_BAR0_HOST_HINT	0x100
-#define PCI_BAR0_ADAPTER_HINT	0x2000
-
-/*
- * Ammasso PCI vendor id and Cepheus PCI device id.
- */
-#define CQ_ARMED 	0x01
-#define CQ_WAIT_FOR_DMA	0x80
-
-/*
- * The format of a hint is as follows:
- * Lower 16 bits are the count of hints for the queue.
- * Next 15 bits are the qp_index
- * Upper most bit depends on who reads it:
- *    If read by producer, then it means Full (1) or Not-Full (0)
- *    If read by consumer, then it means Empty (1) or Not-Empty (0)
- */
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-
-/*
- * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
- * struct.
- */
-#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
-
-#ifndef readq
-static inline u64 readq(const void __iomem * addr)
-{
-	u64 ret = readl(addr + 4);
-	ret <<= 32;
-	ret |= readl(addr);
-
-	return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void __raw_writeq(u64 val, void __iomem * addr)
-{
-	__raw_writel((u32) (val), addr);
-	__raw_writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
-#define C2_SET_CUR_RX(c2dev, cur_rx) \
-	__raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
-
-#define C2_GET_CUR_RX(c2dev) \
-	be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
-
-static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct c2_dev, ibdev);
-}
-
-static inline int c2_errno(void *reply)
-{
-	switch (c2_wr_get_result(reply)) {
-	case C2_OK:
-		return 0;
-	case CCERR_NO_BUFS:
-	case CCERR_INSUFFICIENT_RESOURCES:
-	case CCERR_ZERO_RDMA_READ_RESOURCES:
-		return -ENOMEM;
-	case CCERR_MR_IN_USE:
-	case CCERR_QP_IN_USE:
-		return -EBUSY;
-	case CCERR_ADDR_IN_USE:
-		return -EADDRINUSE;
-	case CCERR_ADDR_NOT_AVAIL:
-		return -EADDRNOTAVAIL;
-	case CCERR_CONN_RESET:
-		return -ECONNRESET;
-	case CCERR_NOT_IMPLEMENTED:
-	case CCERR_INVALID_WQE:
-		return -ENOSYS;
-	case CCERR_QP_NOT_PRIVILEGED:
-		return -EPERM;
-	case CCERR_STACK_ERROR:
-		return -EPROTO;
-	case CCERR_ACCESS_VIOLATION:
-	case CCERR_BASE_AND_BOUNDS_VIOLATION:
-		return -EFAULT;
-	case CCERR_STAG_STATE_NOT_INVALID:
-	case CCERR_INVALID_ADDRESS:
-	case CCERR_INVALID_CQ:
-	case CCERR_INVALID_EP:
-	case CCERR_INVALID_MODIFIER:
-	case CCERR_INVALID_MTU:
-	case CCERR_INVALID_PD_ID:
-	case CCERR_INVALID_QP:
-	case CCERR_INVALID_RNIC:
-	case CCERR_INVALID_STAG:
-		return -EINVAL;
-	default:
-		return -EAGAIN;
-	}
-}
-
-/* Device */
-int c2_register_device(struct c2_dev *c2dev);
-void c2_unregister_device(struct c2_dev *c2dev);
-int c2_rnic_init(struct c2_dev *c2dev);
-void c2_rnic_term(struct c2_dev *c2dev);
-void c2_rnic_interrupt(struct c2_dev *c2dev);
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-
-/* QPs */
-int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
-		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-			struct ib_qp_attr *attr, int attr_mask);
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-				 int ord, int ird);
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-			struct ib_send_wr **bad_wr);
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-			   struct ib_recv_wr **bad_wr);
-void c2_init_qp_table(struct c2_dev *c2dev);
-void c2_cleanup_qp_table(struct c2_dev *c2dev);
-void c2_set_qp_state(struct c2_qp *, int);
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
-
-/* PDs */
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
-int c2_init_pd_table(struct c2_dev *c2dev);
-void c2_cleanup_pd_table(struct c2_dev *c2dev);
-
-/* CQs */
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-		      struct c2_ucontext *ctx, struct c2_cq *cq);
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-
-/* CM */
-int c2_llp_connect(struct iw_cm_id *cm_id,
-			  struct iw_cm_conn_param *iw_param);
-int c2_llp_accept(struct iw_cm_id *cm_id,
-			 struct iw_cm_conn_param *iw_param);
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
-			 u8 pdata_len);
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
-int c2_llp_service_destroy(struct iw_cm_id *cm_id);
-
-/* MM */
-int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
- 				      int page_size, int pbl_depth, u32 length,
- 				      u32 off, u64 *va, enum c2_acf acf,
-				      struct c2_mr *mr);
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
-
-/* AE */
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
-
-/* MQSP Allocator */
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-			     struct sp_chunk **root);
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-			     dma_addr_t *dma_addr, gfp_t gfp_mask);
-void c2_free_mqsp(__be16* mqsp);
-#endif
diff --git a/drivers/staging/rdma/amso1100/c2_ae.c b/drivers/staging/rdma/amso1100/c2_ae.c
deleted file mode 100644
index eb7a92b..0000000
--- a/drivers/staging/rdma/amso1100/c2_ae.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_status.h"
-#include "c2_ae.h"
-
-static int c2_convert_cm_status(u32 c2_status)
-{
-	switch (c2_status) {
-	case C2_CONN_STATUS_SUCCESS:
-		return 0;
-	case C2_CONN_STATUS_REJECTED:
-		return -ENETRESET;
-	case C2_CONN_STATUS_REFUSED:
-		return -ECONNREFUSED;
-	case C2_CONN_STATUS_TIMEDOUT:
-		return -ETIMEDOUT;
-	case C2_CONN_STATUS_NETUNREACH:
-		return -ENETUNREACH;
-	case C2_CONN_STATUS_HOSTUNREACH:
-		return -EHOSTUNREACH;
-	case C2_CONN_STATUS_INVALID_RNIC:
-		return -EINVAL;
-	case C2_CONN_STATUS_INVALID_QP:
-		return -EINVAL;
-	case C2_CONN_STATUS_INVALID_QP_STATE:
-		return -EINVAL;
-	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
-		return -EADDRNOTAVAIL;
-	default:
-		printk(KERN_ERR PFX
-		       "%s - Unable to convert CM status: %d\n",
-		       __func__, c2_status);
-		return -EIO;
-	}
-}
-
-static const char* to_event_str(int event)
-{
-	static const char* event_str[] = {
-		"CCAE_REMOTE_SHUTDOWN",
-		"CCAE_ACTIVE_CONNECT_RESULTS",
-		"CCAE_CONNECTION_REQUEST",
-		"CCAE_LLP_CLOSE_COMPLETE",
-		"CCAE_TERMINATE_MESSAGE_RECEIVED",
-		"CCAE_LLP_CONNECTION_RESET",
-		"CCAE_LLP_CONNECTION_LOST",
-		"CCAE_LLP_SEGMENT_SIZE_INVALID",
-		"CCAE_LLP_INVALID_CRC",
-		"CCAE_LLP_BAD_FPDU",
-		"CCAE_INVALID_DDP_VERSION",
-		"CCAE_INVALID_RDMA_VERSION",
-		"CCAE_UNEXPECTED_OPCODE",
-		"CCAE_INVALID_DDP_QUEUE_NUMBER",
-		"CCAE_RDMA_READ_NOT_ENABLED",
-		"CCAE_RDMA_WRITE_NOT_ENABLED",
-		"CCAE_RDMA_READ_TOO_SMALL",
-		"CCAE_NO_L_BIT",
-		"CCAE_TAGGED_INVALID_STAG",
-		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
-		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
-		"CCAE_TAGGED_INVALID_PD",
-		"CCAE_WRAP_ERROR",
-		"CCAE_BAD_CLOSE",
-		"CCAE_BAD_LLP_CLOSE",
-		"CCAE_INVALID_MSN_RANGE",
-		"CCAE_INVALID_MSN_GAP",
-		"CCAE_IRRQ_OVERFLOW",
-		"CCAE_IRRQ_MSN_GAP",
-		"CCAE_IRRQ_MSN_RANGE",
-		"CCAE_IRRQ_INVALID_STAG",
-		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
-		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
-		"CCAE_IRRQ_INVALID_PD",
-		"CCAE_IRRQ_WRAP_ERROR",
-		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
-		"CCAE_CQ_RQ_COMPLETION_ERROR",
-		"CCAE_QP_SRQ_WQE_ERROR",
-		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
-		"CCAE_CQ_OVERFLOW",
-		"CCAE_CQ_OPERATION_ERROR",
-		"CCAE_SRQ_LIMIT_REACHED",
-		"CCAE_QP_RQ_LIMIT_REACHED",
-		"CCAE_SRQ_CATASTROPHIC_ERROR",
-		"CCAE_RNIC_CATASTROPHIC_ERROR"
-	};
-
-	if (event < CCAE_REMOTE_SHUTDOWN ||
-	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
-		return "<invalid event>";
-
-	event -= CCAE_REMOTE_SHUTDOWN;
-	return event_str[event];
-}
-
-static const char *to_qp_state_str(int state)
-{
-	switch (state) {
-	case C2_QP_STATE_IDLE:
-		return "C2_QP_STATE_IDLE";
-	case C2_QP_STATE_CONNECTING:
-		return "C2_QP_STATE_CONNECTING";
-	case C2_QP_STATE_RTS:
-		return "C2_QP_STATE_RTS";
-	case C2_QP_STATE_CLOSING:
-		return "C2_QP_STATE_CLOSING";
-	case C2_QP_STATE_TERMINATE:
-		return "C2_QP_STATE_TERMINATE";
-	case C2_QP_STATE_ERROR:
-		return "C2_QP_STATE_ERROR";
-	default:
-		return "<invalid QP state>";
-	}
-}
-
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
-{
-	struct c2_mq *mq = c2dev->qptr_array[mq_index];
-	union c2wr *wr;
-	void *resource_user_context;
-	struct iw_cm_event cm_event;
-	struct ib_event ib_event;
-	enum c2_resource_indicator resource_indicator;
-	enum c2_event_id event_id;
-	unsigned long flags;
-	int status;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
-
-	/*
-	 * retrieve the message
-	 */
-	wr = c2_mq_consume(mq);
-	if (!wr)
-		return;
-
-	memset(&ib_event, 0, sizeof(ib_event));
-	memset(&cm_event, 0, sizeof(cm_event));
-
-	event_id = c2_wr_get_id(wr);
-	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
-	resource_user_context =
-	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
-
-	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
-
-	pr_debug("event received c2_dev=%p, event_id=%d, "
-		"resource_indicator=%d, user_context=%p, status = %d\n",
-		c2dev, event_id, resource_indicator, resource_user_context,
-		status);
-
-	switch (resource_indicator) {
-	case C2_RES_IND_QP:{
-
-		struct c2_qp *qp = resource_user_context;
-		struct iw_cm_id *cm_id = qp->cm_id;
-		struct c2wr_ae_active_connect_results *res;
-
-		if (!cm_id) {
-			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
-				qp);
-			goto ignore_it;
-		}
-		pr_debug("%s: event = %s, user_context=%llx, "
-			"resource_type=%x, "
-			"resource=%x, qp_state=%s\n",
-			__func__,
-			to_event_str(event_id),
-			(unsigned long long) wr->ae.ae_generic.user_context,
-			be32_to_cpu(wr->ae.ae_generic.resource_type),
-			be32_to_cpu(wr->ae.ae_generic.resource),
-			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
-
-		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
-
-		switch (event_id) {
-		case CCAE_ACTIVE_CONNECT_RESULTS:
-			res = &wr->ae.ae_active_connect_results;
-			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-			laddr->sin_addr.s_addr = res->laddr;
-			raddr->sin_addr.s_addr = res->raddr;
-			laddr->sin_port = res->lport;
-			raddr->sin_port = res->rport;
-			if (status == 0) {
-				cm_event.private_data_len =
-					be32_to_cpu(res->private_data_length);
-				cm_event.private_data = res->private_data;
-			} else {
-				spin_lock_irqsave(&qp->lock, flags);
-				if (qp->cm_id) {
-					qp->cm_id->rem_ref(qp->cm_id);
-					qp->cm_id = NULL;
-				}
-				spin_unlock_irqrestore(&qp->lock, flags);
-				cm_event.private_data_len = 0;
-				cm_event.private_data = NULL;
-			}
-			if (cm_id->event_handler)
-				cm_id->event_handler(cm_id, &cm_event);
-			break;
-		case CCAE_TERMINATE_MESSAGE_RECEIVED:
-		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
-			ib_event.device = &c2dev->ibdev;
-			ib_event.element.qp = &qp->ibqp;
-			ib_event.event = IB_EVENT_QP_REQ_ERR;
-
-			if (qp->ibqp.event_handler)
-				qp->ibqp.event_handler(&ib_event,
-						       qp->ibqp.
-						       qp_context);
-			break;
-		case CCAE_BAD_CLOSE:
-		case CCAE_LLP_CLOSE_COMPLETE:
-		case CCAE_LLP_CONNECTION_RESET:
-		case CCAE_LLP_CONNECTION_LOST:
-			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
-
-			spin_lock_irqsave(&qp->lock, flags);
-			if (qp->cm_id) {
-				qp->cm_id->rem_ref(qp->cm_id);
-				qp->cm_id = NULL;
-			}
-			spin_unlock_irqrestore(&qp->lock, flags);
-			cm_event.event = IW_CM_EVENT_CLOSE;
-			cm_event.status = 0;
-			if (cm_id->event_handler)
-				cm_id->event_handler(cm_id, &cm_event);
-			break;
-		default:
-			BUG_ON(1);
-			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
-				"CM_ID=%p\n",
-				__func__, __LINE__,
-				event_id, qp, cm_id);
-			break;
-		}
-		break;
-	}
-
-	case C2_RES_IND_EP:{
-
-		struct c2wr_ae_connection_request *req =
-			&wr->ae.ae_connection_request;
-		struct iw_cm_id *cm_id =
-			resource_user_context;
-
-		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
-		if (event_id != CCAE_CONNECTION_REQUEST) {
-			pr_debug("%s: Invalid event_id: %d\n",
-				__func__, event_id);
-			break;
-		}
-		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
-		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
-		laddr->sin_addr.s_addr = req->laddr;
-		raddr->sin_addr.s_addr = req->raddr;
-		laddr->sin_port = req->lport;
-		raddr->sin_port = req->rport;
-		cm_event.private_data_len =
-			be32_to_cpu(req->private_data_length);
-		cm_event.private_data = req->private_data;
-		/*
-		 * Until ird/ord negotiation via MPAv2 support is added, send
-		 * max supported values
-		 */
-		cm_event.ird = cm_event.ord = 128;
-
-		if (cm_id->event_handler)
-			cm_id->event_handler(cm_id, &cm_event);
-		break;
-	}
-
-	case C2_RES_IND_CQ:{
-		struct c2_cq *cq =
-		    resource_user_context;
-
-		pr_debug("IB_EVENT_CQ_ERR\n");
-		ib_event.device = &c2dev->ibdev;
-		ib_event.element.cq = &cq->ibcq;
-		ib_event.event = IB_EVENT_CQ_ERR;
-
-		if (cq->ibcq.event_handler)
-			cq->ibcq.event_handler(&ib_event,
-					       cq->ibcq.cq_context);
-		break;
-	}
-
-	default:
-		printk("Bad resource indicator = %d\n",
-		       resource_indicator);
-		break;
-	}
-
- ignore_it:
-	c2_mq_free(mq);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_ae.h b/drivers/staging/rdma/amso1100/c2_ae.h
deleted file mode 100644
index 3a065c3..0000000
--- a/drivers/staging/rdma/amso1100/c2_ae.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_AE_H_
-#define _C2_AE_H_
-
-/*
- * WARNING: If you change this file, also bump C2_IVN_BASE
- * in common/include/clustercore/c2_ivn.h.
- */
-
-/*
- * Asynchronous Event Identifiers
- *
- * These start at 0x80 only so it's obvious from inspection that
- * they are not work-request statuses.  This isn't critical.
- *
- * NOTE: these event id's must fit in eight bits.
- */
-enum c2_event_id {
-	CCAE_REMOTE_SHUTDOWN = 0x80,
-	CCAE_ACTIVE_CONNECT_RESULTS,
-	CCAE_CONNECTION_REQUEST,
-	CCAE_LLP_CLOSE_COMPLETE,
-	CCAE_TERMINATE_MESSAGE_RECEIVED,
-	CCAE_LLP_CONNECTION_RESET,
-	CCAE_LLP_CONNECTION_LOST,
-	CCAE_LLP_SEGMENT_SIZE_INVALID,
-	CCAE_LLP_INVALID_CRC,
-	CCAE_LLP_BAD_FPDU,
-	CCAE_INVALID_DDP_VERSION,
-	CCAE_INVALID_RDMA_VERSION,
-	CCAE_UNEXPECTED_OPCODE,
-	CCAE_INVALID_DDP_QUEUE_NUMBER,
-	CCAE_RDMA_READ_NOT_ENABLED,
-	CCAE_RDMA_WRITE_NOT_ENABLED,
-	CCAE_RDMA_READ_TOO_SMALL,
-	CCAE_NO_L_BIT,
-	CCAE_TAGGED_INVALID_STAG,
-	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
-	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
-	CCAE_TAGGED_INVALID_PD,
-	CCAE_WRAP_ERROR,
-	CCAE_BAD_CLOSE,
-	CCAE_BAD_LLP_CLOSE,
-	CCAE_INVALID_MSN_RANGE,
-	CCAE_INVALID_MSN_GAP,
-	CCAE_IRRQ_OVERFLOW,
-	CCAE_IRRQ_MSN_GAP,
-	CCAE_IRRQ_MSN_RANGE,
-	CCAE_IRRQ_INVALID_STAG,
-	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
-	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
-	CCAE_IRRQ_INVALID_PD,
-	CCAE_IRRQ_WRAP_ERROR,
-	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
-	CCAE_CQ_RQ_COMPLETION_ERROR,
-	CCAE_QP_SRQ_WQE_ERROR,
-	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
-	CCAE_CQ_OVERFLOW,
-	CCAE_CQ_OPERATION_ERROR,
-	CCAE_SRQ_LIMIT_REACHED,
-	CCAE_QP_RQ_LIMIT_REACHED,
-	CCAE_SRQ_CATASTROPHIC_ERROR,
-	CCAE_RNIC_CATASTROPHIC_ERROR
-/* WARNING If you add more id's, make sure their values fit in eight bits. */
-};
-
-/*
- * Resource Indicators and Identifiers
- */
-enum c2_resource_indicator {
-	C2_RES_IND_QP = 1,
-	C2_RES_IND_EP,
-	C2_RES_IND_CQ,
-	C2_RES_IND_SRQ,
-};
-
-#endif /* _C2_AE_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_alloc.c b/drivers/staging/rdma/amso1100/c2_alloc.c
deleted file mode 100644
index 039872d..0000000
--- a/drivers/staging/rdma/amso1100/c2_alloc.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/bitmap.h>
-
-#include "c2.h"
-
-static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
-			       struct sp_chunk **head)
-{
-	int i;
-	struct sp_chunk *new_head;
-	dma_addr_t dma_addr;
-
-	new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
-				      &dma_addr, gfp_mask);
-	if (new_head == NULL)
-		return -ENOMEM;
-
-	new_head->dma_addr = dma_addr;
-	dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
-
-	new_head->next = NULL;
-	new_head->head = 0;
-
-	/* build list where each index is the next free slot */
-	for (i = 0;
-	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
-		  sizeof(u16)) / sizeof(u16) - 1;
-	     i++) {
-		new_head->shared_ptr[i] = i + 1;
-	}
-	/* terminate list */
-	new_head->shared_ptr[i] = 0xFFFF;
-
-	*head = new_head;
-	return 0;
-}
-
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-		      struct sp_chunk **root)
-{
-	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
-}
-
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
-{
-	struct sp_chunk *next;
-
-	while (root) {
-		next = root->next;
-		dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
-				  dma_unmap_addr(root, mapping));
-		root = next;
-	}
-}
-
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-		      dma_addr_t *dma_addr, gfp_t gfp_mask)
-{
-	u16 mqsp;
-
-	while (head) {
-		mqsp = head->head;
-		if (mqsp != 0xFFFF) {
-			head->head = head->shared_ptr[mqsp];
-			break;
-		} else if (head->next == NULL) {
-			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
-			    0) {
-				head = head->next;
-				mqsp = head->head;
-				head->head = head->shared_ptr[mqsp];
-				break;
-			} else
-				return NULL;
-		} else
-			head = head->next;
-	}
-	if (head) {
-		*dma_addr = head->dma_addr +
-			    ((unsigned long) &(head->shared_ptr[mqsp]) -
-			     (unsigned long) head);
-		pr_debug("%s addr %p dma_addr %llx\n", __func__,
-			 &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
-		return (__force __be16 *) &(head->shared_ptr[mqsp]);
-	}
-	return NULL;
-}
-
-void c2_free_mqsp(__be16 *mqsp)
-{
-	struct sp_chunk *head;
-	u16 idx;
-
-	/* The chunk containing this ptr begins at the page boundary */
-	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
-
-	/* Link head to new mqsp */
-	*mqsp = (__force __be16) head->head;
-
-	/* Compute the shared_ptr index */
-	idx = (offset_in_page(mqsp)) >> 1;
-	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
-
-	/* Point this index at the head */
-	head->shared_ptr[idx] = head->head;
-
-	/* Point head at this index */
-	head->head = idx;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cm.c b/drivers/staging/rdma/amso1100/c2_cm.c
deleted file mode 100644
index f8dbdb9..0000000
--- a/drivers/staging/rdma/amso1100/c2_cm.c
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_vq.h"
-#include <rdma/iw_cm.h>
-
-int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	struct c2_dev *c2dev = to_c2dev(cm_id->device);
-	struct ib_qp *ibqp;
-	struct c2_qp *qp;
-	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
-	struct c2_vq_req *vq_req;
-	int err;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-
-	if (cm_id->remote_addr.ss_family != AF_INET)
-		return -ENOSYS;
-
-	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-	if (!ibqp)
-		return -EINVAL;
-	qp = to_c2qp(ibqp);
-
-	/* Associate QP <--> CM_ID */
-	cm_id->provider_data = qp;
-	cm_id->add_ref(cm_id);
-	qp->cm_id = cm_id;
-
-	/*
-	 * only support the max private_data length
-	 */
-	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-		err = -EINVAL;
-		goto bail0;
-	}
-	/*
-	 * Set the rdma read limits
-	 */
-	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Create and send a WR_QP_CONNECT...
-	 */
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	c2_wr_set_id(wr, CCWR_QP_CONNECT);
-	wr->hdr.context = 0;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->qp_handle = qp->adapter_handle;
-
-	wr->remote_addr = raddr->sin_addr.s_addr;
-	wr->remote_port = raddr->sin_port;
-
-	/*
-	 * Move any private data from the callers's buf into
-	 * the WR.
-	 */
-	if (iw_param->private_data) {
-		wr->private_data_length =
-			cpu_to_be32(iw_param->private_data_len);
-		memcpy(&wr->private_data[0], iw_param->private_data,
-		       iw_param->private_data_len);
-	} else
-		wr->private_data_length = 0;
-
-	/*
-	 * Send WR to adapter.  NOTE: There is no synch reply from
-	 * the adapter.
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	vq_req_free(c2dev, vq_req);
-
- bail1:
-	kfree(wr);
- bail0:
-	if (err) {
-		/*
-		 * If we fail, release reference on QP and
-		 * disassociate QP from CM_ID
-		 */
-		cm_id->provider_data = NULL;
-		qp->cm_id = NULL;
-		cm_id->rem_ref(cm_id);
-	}
-	return err;
-}
-
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-	struct c2_dev *c2dev;
-	struct c2wr_ep_listen_create_req wr;
-	struct c2wr_ep_listen_create_rep *reply;
-	struct c2_vq_req *vq_req;
-	int err;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-
-	if (cm_id->local_addr.ss_family != AF_INET)
-		return -ENOSYS;
-
-	c2dev = to_c2dev(cm_id->device);
-	if (c2dev == NULL)
-		return -EINVAL;
-
-	/*
-	 * Allocate verbs request.
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
-	wr.hdr.context = (u64) (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.local_addr = laddr->sin_addr.s_addr;
-	wr.local_port = laddr->sin_port;
-	wr.backlog = cpu_to_be32(backlog);
-	wr.user_context = (u64) (unsigned long) cm_id;
-
-	/*
-	 * Reference the request struct.  Dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Process reply
-	 */
-	reply =
-	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	if ((err = c2_errno(reply)) != 0)
-		goto bail1;
-
-	/*
-	 * Keep the adapter handle. Used in subsequent destroy
-	 */
-	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
-
-	/*
-	 * free vq stuff
-	 */
-	vq_repbuf_free(c2dev, reply);
-	vq_req_free(c2dev, vq_req);
-
-	return 0;
-
- bail1:
-	vq_repbuf_free(c2dev, reply);
- bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-
-int c2_llp_service_destroy(struct iw_cm_id *cm_id)
-{
-
-	struct c2_dev *c2dev;
-	struct c2wr_ep_listen_destroy_req wr;
-	struct c2wr_ep_listen_destroy_rep *reply;
-	struct c2_vq_req *vq_req;
-	int err;
-
-	c2dev = to_c2dev(cm_id->device);
-	if (c2dev == NULL)
-		return -EINVAL;
-
-	/*
-	 * Allocate verbs request.
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Process reply
-	 */
-	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	vq_repbuf_free(c2dev, reply);
- bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	struct c2_dev *c2dev = to_c2dev(cm_id->device);
-	struct c2_qp *qp;
-	struct ib_qp *ibqp;
-	struct c2wr_cr_accept_req *wr;	/* variable length WR */
-	struct c2_vq_req *vq_req;
-	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
-	int err;
-
-	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-	if (!ibqp)
-		return -EINVAL;
-	qp = to_c2qp(ibqp);
-
-	/* Set the RDMA read limits */
-	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-	if (err)
-		goto bail0;
-
-	/* Allocate verbs request. */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-	vq_req->qp = qp;
-	vq_req->cm_id = cm_id;
-	vq_req->event = IW_CM_EVENT_ESTABLISHED;
-
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	/* Build the WR */
-	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
-	wr->qp_handle = qp->adapter_handle;
-
-	/* Replace the cr_handle with the QP after accept */
-	cm_id->provider_data = qp;
-	cm_id->add_ref(cm_id);
-	qp->cm_id = cm_id;
-
-	cm_id->provider_data = qp;
-
-	/* Validate private_data length */
-	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-		err = -EINVAL;
-		goto bail1;
-	}
-
-	if (iw_param->private_data) {
-		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
-		memcpy(&wr->private_data[0],
-		       iw_param->private_data, iw_param->private_data_len);
-	} else
-		wr->private_data_length = 0;
-
-	/* Reference the request struct.  Dereferenced in the int handler. */
-	vq_req_get(c2dev, vq_req);
-
-	/* Send WR to adapter */
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	/* Wait for reply from adapter */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	/* Check that reply is present */
-	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-
-	if (!err)
-		c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail1:
-	kfree(wr);
-	vq_req_free(c2dev, vq_req);
- bail0:
-	if (err) {
-		/*
-		 * If we fail, release reference on QP and
-		 * disassociate QP from CM_ID
-		 */
-		cm_id->provider_data = NULL;
-		qp->cm_id = NULL;
-		cm_id->rem_ref(cm_id);
-	}
-	return err;
-}
-
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-	struct c2_dev *c2dev;
-	struct c2wr_cr_reject_req wr;
-	struct c2_vq_req *vq_req;
-	struct c2wr_cr_reject_rep *reply;
-	int err;
-
-	c2dev = to_c2dev(cm_id->device);
-
-	/*
-	 * Allocate verbs request.
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_CR_REJECT);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
-		vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-	err = c2_errno(reply);
-	/*
-	 * free vq stuff
-	 */
-	vq_repbuf_free(c2dev, reply);
-
- bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
deleted file mode 100644
index 7ad0c08..0000000
--- a/drivers/staging/rdma/amso1100/c2_cq.c
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
-
-static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
-{
-	struct c2_cq *cq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&c2dev->lock, flags);
-	cq = c2dev->qptr_array[cqn];
-	if (!cq) {
-		spin_unlock_irqrestore(&c2dev->lock, flags);
-		return NULL;
-	}
-	atomic_inc(&cq->refcount);
-	spin_unlock_irqrestore(&c2dev->lock, flags);
-	return cq;
-}
-
-static void c2_cq_put(struct c2_cq *cq)
-{
-	if (atomic_dec_and_test(&cq->refcount))
-		wake_up(&cq->wait);
-}
-
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
-{
-	struct c2_cq *cq;
-
-	cq = c2_cq_get(c2dev, mq_index);
-	if (!cq) {
-		printk("discarding events on destroyed CQN=%d\n", mq_index);
-		return;
-	}
-
-	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
-	c2_cq_put(cq);
-}
-
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
-{
-	struct c2_cq *cq;
-	struct c2_mq *q;
-
-	cq = c2_cq_get(c2dev, mq_index);
-	if (!cq)
-		return;
-
-	spin_lock_irq(&cq->lock);
-	q = &cq->mq;
-	if (q && !c2_mq_empty(q)) {
-		u16 priv = q->priv;
-		struct c2wr_ce *msg;
-
-		while (priv != be16_to_cpu(*q->shared)) {
-			msg = (struct c2wr_ce *)
-				(q->msg_pool.host + priv * q->msg_size);
-			if (msg->qp_user_context == (u64) (unsigned long) qp) {
-				msg->qp_user_context = (u64) 0;
-			}
-			priv = (priv + 1) % q->q_size;
-		}
-	}
-	spin_unlock_irq(&cq->lock);
-	c2_cq_put(cq);
-}
-
-static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
-{
-	switch (status) {
-	case C2_OK:
-		return IB_WC_SUCCESS;
-	case CCERR_FLUSHED:
-		return IB_WC_WR_FLUSH_ERR;
-	case CCERR_BASE_AND_BOUNDS_VIOLATION:
-		return IB_WC_LOC_PROT_ERR;
-	case CCERR_ACCESS_VIOLATION:
-		return IB_WC_LOC_ACCESS_ERR;
-	case CCERR_TOTAL_LENGTH_TOO_BIG:
-		return IB_WC_LOC_LEN_ERR;
-	case CCERR_INVALID_WINDOW:
-		return IB_WC_MW_BIND_ERR;
-	default:
-		return IB_WC_GENERAL_ERR;
-	}
-}
-
-
-static inline int c2_poll_one(struct c2_dev *c2dev,
-			      struct c2_cq *cq, struct ib_wc *entry)
-{
-	struct c2wr_ce *ce;
-	struct c2_qp *qp;
-	int is_recv = 0;
-
-	ce = c2_mq_consume(&cq->mq);
-	if (!ce) {
-		return -EAGAIN;
-	}
-
-	/*
-	 * if the qp returned is null then this qp has already
-	 * been freed and we are unable process the completion.
-	 * try pulling the next message
-	 */
-	while ((qp =
-		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
-		c2_mq_free(&cq->mq);
-		ce = c2_mq_consume(&cq->mq);
-		if (!ce)
-			return -EAGAIN;
-	}
-
-	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
-	entry->wr_id = ce->hdr.context;
-	entry->qp = &qp->ibqp;
-	entry->wc_flags = 0;
-	entry->slid = 0;
-	entry->sl = 0;
-	entry->src_qp = 0;
-	entry->dlid_path_bits = 0;
-	entry->pkey_index = 0;
-
-	switch (c2_wr_get_id(ce)) {
-	case C2_WR_TYPE_SEND:
-		entry->opcode = IB_WC_SEND;
-		break;
-	case C2_WR_TYPE_RDMA_WRITE:
-		entry->opcode = IB_WC_RDMA_WRITE;
-		break;
-	case C2_WR_TYPE_RDMA_READ:
-		entry->opcode = IB_WC_RDMA_READ;
-		break;
-	case C2_WR_TYPE_RECV:
-		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
-		entry->opcode = IB_WC_RECV;
-		is_recv = 1;
-		break;
-	default:
-		break;
-	}
-
-	/* consume the WQEs */
-	if (is_recv)
-		c2_mq_lconsume(&qp->rq_mq, 1);
-	else
-		c2_mq_lconsume(&qp->sq_mq,
-			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
-
-	/* free the message */
-	c2_mq_free(&cq->mq);
-
-	return 0;
-}
-
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct c2_dev *c2dev = to_c2dev(ibcq->device);
-	struct c2_cq *cq = to_c2cq(ibcq);
-	unsigned long flags;
-	int npolled, err;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	for (npolled = 0; npolled < num_entries; ++npolled) {
-
-		err = c2_poll_one(c2dev, cq, entry + npolled);
-		if (err)
-			break;
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return npolled;
-}
-
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct c2_mq_shared __iomem *shared;
-	struct c2_cq *cq;
-	unsigned long flags;
-	int ret = 0;
-
-	cq = to_c2cq(ibcq);
-	shared = cq->mq.peer;
-
-	if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
-		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
-	else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
-	else
-		return -EINVAL;
-
-	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
-
-	/*
-	 * Now read back shared->armed to make the PCI
-	 * write synchronous.  This is necessary for
-	 * correct cq notification semantics.
-	 */
-	readb(&shared->armed);
-
-	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-		spin_lock_irqsave(&cq->lock, flags);
-		ret = !c2_mq_empty(&cq->mq);
-		spin_unlock_irqrestore(&cq->lock, flags);
-	}
-
-	return ret;
-}
-
-static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
-{
-	dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
-			  mq->msg_pool.host, dma_unmap_addr(mq, mapping));
-}
-
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
-			   size_t q_size, size_t msg_size)
-{
-	u8 *pool_start;
-
-	if (q_size > SIZE_MAX / msg_size)
-		return -EINVAL;
-
-	pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
-					&mq->host_dma, GFP_KERNEL);
-	if (!pool_start)
-		return -ENOMEM;
-
-	c2_mq_rep_init(mq,
-		       0,		/* index (currently unknown) */
-		       q_size,
-		       msg_size,
-		       pool_start,
-		       NULL,	/* peer (currently unknown) */
-		       C2_MQ_HOST_TARGET);
-
-	dma_unmap_addr_set(mq, mapping, mq->host_dma);
-
-	return 0;
-}
-
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-	       struct c2_ucontext *ctx, struct c2_cq *cq)
-{
-	struct c2wr_cq_create_req wr;
-	struct c2wr_cq_create_rep *reply;
-	unsigned long peer_pa;
-	struct c2_vq_req *vq_req;
-	int err;
-
-	might_sleep();
-
-	cq->ibcq.cqe = entries - 1;
-	cq->is_kernel = !ctx;
-
-	/* Allocate a shared pointer */
-	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-				      &cq->mq.shared_dma, GFP_KERNEL);
-	if (!cq->mq.shared)
-		return -ENOMEM;
-
-	/* Allocate pages for the message pool */
-	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
-	if (err)
-		goto bail0;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
-	wr.depth = cpu_to_be32(cq->mq.q_size);
-	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
-	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
-	wr.user_context = (u64) (unsigned long) (cq);
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail2;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail2;
-
-	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail2;
-	}
-
-	if ((err = c2_errno(reply)) != 0)
-		goto bail3;
-
-	cq->adapter_handle = reply->cq_handle;
-	cq->mq.index = be32_to_cpu(reply->mq_index);
-
-	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
-	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
-	if (!cq->mq.peer) {
-		err = -ENOMEM;
-		goto bail3;
-	}
-
-	vq_repbuf_free(c2dev, reply);
-	vq_req_free(c2dev, vq_req);
-
-	spin_lock_init(&cq->lock);
-	atomic_set(&cq->refcount, 1);
-	init_waitqueue_head(&cq->wait);
-
-	/*
-	 * Use the MQ index allocated by the adapter to
-	 * store the CQ in the qptr_array
-	 */
-	cq->cqn = cq->mq.index;
-	c2dev->qptr_array[cq->cqn] = cq;
-
-	return 0;
-
-bail3:
-	vq_repbuf_free(c2dev, reply);
-bail2:
-	vq_req_free(c2dev, vq_req);
-bail1:
-	c2_free_cq_buf(c2dev, &cq->mq);
-bail0:
-	c2_free_mqsp(cq->mq.shared);
-
-	return err;
-}
-
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
-{
-	int err;
-	struct c2_vq_req *vq_req;
-	struct c2wr_cq_destroy_req wr;
-	struct c2wr_cq_destroy_rep *reply;
-
-	might_sleep();
-
-	/* Clear CQ from the qptr array */
-	spin_lock_irq(&c2dev->lock);
-	c2dev->qptr_array[cq->mq.index] = NULL;
-	atomic_dec(&cq->refcount);
-	spin_unlock_irq(&c2dev->lock);
-
-	wait_event(cq->wait, !atomic_read(&cq->refcount));
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		goto bail0;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.cq_handle = cq->adapter_handle;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-	if (reply)
-		vq_repbuf_free(c2dev, reply);
-bail1:
-	vq_req_free(c2dev, vq_req);
-bail0:
-	if (cq->is_kernel) {
-		c2_free_cq_buf(c2dev, &cq->mq);
-	}
-
-	return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_intr.c b/drivers/staging/rdma/amso1100/c2_intr.c
deleted file mode 100644
index 74b32a9..0000000
--- a/drivers/staging/rdma/amso1100/c2_intr.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_vq.h"
-
-static void handle_mq(struct c2_dev *c2dev, u32 index);
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
-
-/*
- * Handle RNIC interrupts
- */
-void c2_rnic_interrupt(struct c2_dev *c2dev)
-{
-	unsigned int mq_index;
-
-	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
-		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
-		if (mq_index & 0x80000000) {
-			break;
-		}
-
-		c2dev->hints_read++;
-		handle_mq(c2dev, mq_index);
-	}
-
-}
-
-/*
- * Top level MQ handler
- */
-static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
-{
-	if (c2dev->qptr_array[mq_index] == NULL) {
-		pr_debug("handle_mq: stray activity for mq_index=%d\n",
-			 mq_index);
-		return;
-	}
-
-	switch (mq_index) {
-	case (0):
-		/*
-		 * An index of 0 in the activity queue
-		 * indicates the req vq now has messages
-		 * available...
-		 *
-		 * Wake up any waiters waiting on req VQ
-		 * message availability.
-		 */
-		wake_up(&c2dev->req_vq_wo);
-		break;
-	case (1):
-		handle_vq(c2dev, mq_index);
-		break;
-	case (2):
-		/* We have to purge the VQ in case there are pending
-		 * accept reply requests that would result in the
-		 * generation of an ESTABLISHED event. If we don't
-		 * generate these first, a CLOSE event could end up
-		 * being delivered before the ESTABLISHED event.
-		 */
-		handle_vq(c2dev, 1);
-
-		c2_ae_event(c2dev, mq_index);
-		break;
-	default:
-		/* There is no event synchronization between CQ events
-		 * and AE or CM events. In fact, CQE could be
-		 * delivered for all of the I/O up to and including the
-		 * FLUSH for a peer disconenct prior to the ESTABLISHED
-		 * event being delivered to the app. The reason for this
-		 * is that CM events are delivered on a thread, while AE
-		 * and CM events are delivered on interrupt context.
-		 */
-		c2_cq_event(c2dev, mq_index);
-		break;
-	}
-
-	return;
-}
-
-/*
- * Handles verbs WR replies.
- */
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
-{
-	void *adapter_msg, *reply_msg;
-	struct c2wr_hdr *host_msg;
-	struct c2wr_hdr tmp;
-	struct c2_mq *reply_vq;
-	struct c2_vq_req *req;
-	struct iw_cm_event cm_event;
-	int err;
-
-	reply_vq = c2dev->qptr_array[mq_index];
-
-	/*
-	 * get next msg from mq_index into adapter_msg.
-	 * don't free it yet.
-	 */
-	adapter_msg = c2_mq_consume(reply_vq);
-	if (adapter_msg == NULL) {
-		return;
-	}
-
-	host_msg = vq_repbuf_alloc(c2dev);
-
-	/*
-	 * If we can't get a host buffer, then we'll still
-	 * wakeup the waiter, we just won't give him the msg.
-	 * It is assumed the waiter will deal with this...
-	 */
-	if (!host_msg) {
-		pr_debug("handle_vq: no repbufs!\n");
-
-		/*
-		 * just copy the WR header into a local variable.
-		 * this allows us to still demux on the context
-		 */
-		host_msg = &tmp;
-		memcpy(host_msg, adapter_msg, sizeof(tmp));
-		reply_msg = NULL;
-	} else {
-		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
-		reply_msg = host_msg;
-	}
-
-	/*
-	 * consume the msg from the MQ
-	 */
-	c2_mq_free(reply_vq);
-
-	/*
-	 * wakeup the waiter.
-	 */
-	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
-	if (req == NULL) {
-		/*
-		 * We should never get here, as the adapter should
-		 * never send us a reply that we're not expecting.
-		 */
-		if (reply_msg != NULL)
-			vq_repbuf_free(c2dev, host_msg);
-		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
-		return;
-	}
-
-	if (reply_msg)
-		err = c2_errno(reply_msg);
-	else
-		err = -ENOMEM;
-
-	if (!err) switch (req->event) {
-	case IW_CM_EVENT_ESTABLISHED:
-		c2_set_qp_state(req->qp,
-				C2_QP_STATE_RTS);
-		/*
-		 * Until ird/ord negotiation via MPAv2 support is added, send
-		 * max supported values
-		 */
-		cm_event.ird = cm_event.ord = 128;
-	case IW_CM_EVENT_CLOSE:
-
-		/*
-		 * Move the QP to RTS if this is
-		 * the established event
-		 */
-		cm_event.event = req->event;
-		cm_event.status = 0;
-		cm_event.local_addr = req->cm_id->local_addr;
-		cm_event.remote_addr = req->cm_id->remote_addr;
-		cm_event.private_data = NULL;
-		cm_event.private_data_len = 0;
-		req->cm_id->event_handler(req->cm_id, &cm_event);
-		break;
-	default:
-		break;
-	}
-
-	req->reply_msg = (u64) (unsigned long) (reply_msg);
-	atomic_set(&req->reply_ready, 1);
-	wake_up(&req->wait_object);
-
-	/*
-	 * If the request was cancelled, then this put will
-	 * free the vq_req memory...and reply_msg!!!
-	 */
-	vq_req_put(c2dev, req);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mm.c b/drivers/staging/rdma/amso1100/c2_mm.c
deleted file mode 100644
index 25081e2..0000000
--- a/drivers/staging/rdma/amso1100/c2_mm.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-
-#define PBL_VIRT 1
-#define PBL_PHYS 2
-
-/*
- * Send all the PBL messages to convey the remainder of the PBL
- * Wait for the adapter's reply on the last one.
- * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
- *
- * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
- *	  Reply buffer _is_ freed by this function.
- */
-static int
-send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
-		  unsigned long va, u32 pbl_depth,
-		  struct c2_vq_req *vq_req, int pbl_type)
-{
-	u32 pbe_count;		/* amt that fits in a PBL msg */
-	u32 count;		/* amt in this PBL MSG. */
-	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
-	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
- 	int err, pbl_virt, pbl_index, i;
-
-	switch (pbl_type) {
-	case PBL_VIRT:
-		pbl_virt = 1;
-		break;
-	case PBL_PHYS:
-		pbl_virt = 0;
-		break;
-	default:
-		return -EINVAL;
-		break;
-	}
-
-	pbe_count = (c2dev->req_vq.msg_size -
-		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		return -ENOMEM;
-	}
-	c2_wr_set_id(wr, CCWR_NSMR_PBL);
-
-	/*
-	 * Only the last PBL message will generate a reply from the verbs,
-	 * so we set the context to 0 indicating there is no kernel verbs
-	 * handler blocked awaiting this reply.
-	 */
-	wr->hdr.context = 0;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->stag_index = stag_index;	/* already swapped */
-	wr->flags = 0;
-	pbl_index = 0;
-	while (pbl_depth) {
-		count = min(pbe_count, pbl_depth);
-		wr->addrs_length = cpu_to_be32(count);
-
-		/*
-		 *  If this is the last message, then reference the
-		 *  vq request struct cuz we're gonna wait for a reply.
-		 *  also make this PBL msg as the last one.
-		 */
-		if (count == pbl_depth) {
-			/*
-			 * reference the request struct.  dereferenced in the
-			 * int handler.
-			 */
-			vq_req_get(c2dev, vq_req);
-			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
-
-			/*
-			 * This is the last PBL message.
-			 * Set the context to our VQ Request Object so we can
-			 * wait for the reply.
-			 */
-			wr->hdr.context = (unsigned long) vq_req;
-		}
-
-		/*
-		 * If pbl_virt is set then va is a virtual address
-		 * that describes a virtually contiguous memory
-		 * allocation. The wr needs the start of each virtual page
-		 * to be converted to the corresponding physical address
-		 * of the page. If pbl_virt is not set then va is an array
-		 * of physical addresses and there is no conversion to do.
-		 * Just fill in the wr with what is in the array.
-		 */
-		for (i = 0; i < count; i++) {
-			if (pbl_virt) {
-				va += PAGE_SIZE;
-			} else {
- 				wr->paddrs[i] =
-				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
-			}
-		}
-
-		/*
-		 * Send WR to adapter
-		 */
-		err = vq_send_wr(c2dev, (union c2wr *) wr);
-		if (err) {
-			if (count <= pbe_count) {
-				vq_req_put(c2dev, vq_req);
-			}
-			goto bail0;
-		}
-		pbl_depth -= count;
-		pbl_index += count;
-	}
-
-	/*
-	 *  Now wait for the reply...
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	kfree(wr);
-	return err;
-}
-
-#define C2_PBL_MAX_DEPTH 131072
-int
-c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
- 			   int page_size, int pbl_depth, u32 length,
- 			   u32 offset, u64 *va, enum c2_acf acf,
-			   struct c2_mr *mr)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_nsmr_register_req *wr;
-	struct c2wr_nsmr_register_rep *reply;
-	u16 flags;
-	int i, pbe_count, count;
-	int err;
-
-	if (!va || !length || !addr_list || !pbl_depth)
-		return -EINTR;
-
-	/*
-	 * Verify PBL depth is within rnic max
-	 */
-	if (pbl_depth > C2_PBL_MAX_DEPTH) {
-		return -EINTR;
-	}
-
-	/*
-	 * allocate verbs request object
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	/*
-	 * build the WR
-	 */
-	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-
-	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
-
-	/*
-	 * compute how many pbes can fit in the message
-	 */
-	pbe_count = (c2dev->req_vq.msg_size -
-		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
-
-	if (pbl_depth <= pbe_count) {
-		flags |= MEM_PBL_COMPLETE;
-	}
-	wr->flags = cpu_to_be16(flags);
-	wr->stag_key = 0;	//stag_key;
-	wr->va = cpu_to_be64(*va);
-	wr->pd_id = mr->pd->pd_id;
-	wr->pbe_size = cpu_to_be32(page_size);
-	wr->length = cpu_to_be32(length);
-	wr->pbl_depth = cpu_to_be32(pbl_depth);
-	wr->fbo = cpu_to_be32(offset);
-	count = min(pbl_depth, pbe_count);
-	wr->addrs_length = cpu_to_be32(count);
-
-	/*
-	 * fill out the PBL for this message
-	 */
-	for (i = 0; i < count; i++) {
-		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
-	}
-
-	/*
-	 * regerence the request struct
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * send the WR to the adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	/*
-	 * wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail1;
-	}
-
-	/*
-	 * process reply
-	 */
-	reply =
-	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-	if ((err = c2_errno(reply))) {
-		goto bail2;
-	}
-	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
-	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
-	vq_repbuf_free(c2dev, reply);
-
-	/*
-	 * if there are still more PBEs we need to send them to
-	 * the adapter and wait for a reply on the final one.
-	 * reuse vq_req for this purpose.
-	 */
-	pbl_depth -= count;
-	if (pbl_depth) {
-
-		vq_req->reply_msg = (unsigned long) NULL;
-		atomic_set(&vq_req->reply_ready, 0);
-		err = send_pbl_messages(c2dev,
-					cpu_to_be32(mr->ibmr.lkey),
-					(unsigned long) &addr_list[i],
-					pbl_depth, vq_req, PBL_PHYS);
-		if (err) {
-			goto bail1;
-		}
-	}
-
-	vq_req_free(c2dev, vq_req);
-	kfree(wr);
-
-	return err;
-
-bail2:
-	vq_repbuf_free(c2dev, reply);
-bail1:
-	kfree(wr);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
-{
-	struct c2_vq_req *vq_req;	/* verbs request object */
-	struct c2wr_stag_dealloc_req wr;	/* work request */
-	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
-	int err;
-
-
-	/*
-	 * allocate verbs request object
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		return -ENOMEM;
-	}
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
-	wr.hdr.context = (u64) (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.stag_index = cpu_to_be32(stag_index);
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.c b/drivers/staging/rdma/amso1100/c2_mq.c
deleted file mode 100644
index 7827fb8..0000000
--- a/drivers/staging/rdma/amso1100/c2_mq.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include "c2_mq.h"
-
-void *c2_mq_alloc(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-	if (c2_mq_full(q)) {
-		return NULL;
-	} else {
-#ifdef DEBUG
-		struct c2wr_hdr *m =
-		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
-		m->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-		return m;
-#else
-		return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-	}
-}
-
-void c2_mq_produce(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-	if (!c2_mq_full(q)) {
-		q->priv = (q->priv + 1) % q->q_size;
-		q->hint_count++;
-		/* Update peer's offset. */
-		__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-	}
-}
-
-void *c2_mq_consume(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-	if (c2_mq_empty(q)) {
-		return NULL;
-	} else {
-#ifdef DEBUG
-		struct c2wr_hdr *m = (struct c2wr_hdr *)
-		    (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
-#endif
-		return m;
-#else
-		return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-	}
-}
-
-void c2_mq_free(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-	if (!c2_mq_empty(q)) {
-
-#ifdef CCMSGMAGIC
-		{
-			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
-			    (q->msg_pool.adapter + q->priv * q->msg_size);
-			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
-		}
-#endif
-		q->priv = (q->priv + 1) % q->q_size;
-		/* Update peer's offset. */
-		__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-	}
-}
-
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-	while (wqe_count--) {
-		BUG_ON(c2_mq_empty(q));
-		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
-	}
-}
-
-#if 0
-u32 c2_mq_count(struct c2_mq *q)
-{
-	s32 count;
-
-	if (q->type == C2_MQ_HOST_TARGET)
-		count = be16_to_cpu(*q->shared) - q->priv;
-	else
-		count = q->priv - be16_to_cpu(*q->shared);
-
-	if (count < 0)
-		count += q->q_size;
-
-	return (u32) count;
-}
-#endif  /*  0  */
-
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
-{
-	BUG_ON(!q->shared);
-
-	/* This code assumes the byte swapping has already been done! */
-	q->index = index;
-	q->q_size = q_size;
-	q->msg_size = msg_size;
-	q->msg_pool.adapter = pool_start;
-	q->peer = (struct c2_mq_shared __iomem *) peer;
-	q->magic = C2_MQ_MAGIC;
-	q->type = type;
-	q->priv = 0;
-	q->hint_count = 0;
-	return;
-}
-
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-		    u8 *pool_start, u16 __iomem *peer, u32 type)
-{
-	BUG_ON(!q->shared);
-
-	/* This code assumes the byte swapping has already been done! */
-	q->index = index;
-	q->q_size = q_size;
-	q->msg_size = msg_size;
-	q->msg_pool.host = pool_start;
-	q->peer = (struct c2_mq_shared __iomem *) peer;
-	q->magic = C2_MQ_MAGIC;
-	q->type = type;
-	q->priv = 0;
-	q->hint_count = 0;
-	return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.h b/drivers/staging/rdma/amso1100/c2_mq.h
deleted file mode 100644
index 8e1b4d1..0000000
--- a/drivers/staging/rdma/amso1100/c2_mq.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _C2_MQ_H_
-#define _C2_MQ_H_
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include "c2_wr.h"
-
-enum c2_shared_regs {
-
-	C2_SHARED_ARMED = 0x10,
-	C2_SHARED_NOTIFY = 0x18,
-	C2_SHARED_SHARED = 0x40,
-};
-
-struct c2_mq_shared {
-	u16 unused1;
-	u8 armed;
-	u8 notification_type;
-	u32 unused2;
-	u16 shared;
-	/* Pad to 64 bytes. */
-	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
-};
-
-enum c2_mq_type {
-	C2_MQ_HOST_TARGET = 1,
-	C2_MQ_ADAPTER_TARGET = 2,
-};
-
-/*
- * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
- * c2_user_mq_t (which is the same format) is for user-mode MQs...
- */
-#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
-struct c2_mq {
-	u32 magic;
-	union {
-		u8 *host;
-		u8 __iomem *adapter;
-	} msg_pool;
-	dma_addr_t host_dma;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	u16 hint_count;
-	u16 priv;
-	struct c2_mq_shared __iomem *peer;
-	__be16 *shared;
-	dma_addr_t shared_dma;
-	u32 q_size;
-	u32 msg_size;
-	u32 index;
-	enum c2_mq_type type;
-};
-
-static __inline__ int c2_mq_empty(struct c2_mq *q)
-{
-	return q->priv == be16_to_cpu(*q->shared);
-}
-
-static __inline__ int c2_mq_full(struct c2_mq *q)
-{
-	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
-}
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
-void *c2_mq_alloc(struct c2_mq *q);
-void c2_mq_produce(struct c2_mq *q);
-void *c2_mq_consume(struct c2_mq *q);
-void c2_mq_free(struct c2_mq *q);
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-			   u8 *pool_start, u16 __iomem *peer, u32 type);
-
-#endif				/* _C2_MQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_pd.c b/drivers/staging/rdma/amso1100/c2_pd.c
deleted file mode 100644
index f3e81dc..0000000
--- a/drivers/staging/rdma/amso1100/c2_pd.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-
-#include "c2.h"
-#include "c2_provider.h"
-
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
-{
-	u32 obj;
-	int ret = 0;
-
-	spin_lock(&c2dev->pd_table.lock);
-	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
-				 c2dev->pd_table.last);
-	if (obj >= c2dev->pd_table.max)
-		obj = find_first_zero_bit(c2dev->pd_table.table,
-					  c2dev->pd_table.max);
-	if (obj < c2dev->pd_table.max) {
-		pd->pd_id = obj;
-		__set_bit(obj, c2dev->pd_table.table);
-		c2dev->pd_table.last = obj+1;
-		if (c2dev->pd_table.last >= c2dev->pd_table.max)
-			c2dev->pd_table.last = 0;
-	} else
-		ret = -ENOMEM;
-	spin_unlock(&c2dev->pd_table.lock);
-	return ret;
-}
-
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
-{
-	spin_lock(&c2dev->pd_table.lock);
-	__clear_bit(pd->pd_id, c2dev->pd_table.table);
-	spin_unlock(&c2dev->pd_table.lock);
-}
-
-int c2_init_pd_table(struct c2_dev *c2dev)
-{
-
-	c2dev->pd_table.last = 0;
-	c2dev->pd_table.max = c2dev->props.max_pd;
-	spin_lock_init(&c2dev->pd_table.lock);
-	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
-					sizeof(long), GFP_KERNEL);
-	if (!c2dev->pd_table.table)
-		return -ENOMEM;
-	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
-	return 0;
-}
-
-void c2_cleanup_pd_table(struct c2_dev *c2dev)
-{
-	kfree(c2dev->pd_table.table);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
deleted file mode 100644
index de8d10e..0000000
--- a/drivers/staging/rdma/amso1100/c2_provider.c
+++ /dev/null
@@ -1,862 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/if_arp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include "c2.h"
-#include "c2_provider.h"
-#include "c2_user.h"
-
-static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			   struct ib_udata *uhw)
-{
-	struct c2_dev *c2dev = to_c2dev(ibdev);
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	*props = c2dev->props;
-	return 0;
-}
-
-static int c2_query_port(struct ib_device *ibdev,
-			 u8 port, struct ib_port_attr *props)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	props->max_mtu = IB_MTU_4096;
-	props->lid = 0;
-	props->lmc = 0;
-	props->sm_lid = 0;
-	props->sm_sl = 0;
-	props->state = IB_PORT_ACTIVE;
-	props->phys_state = 0;
-	props->port_cap_flags =
-	    IB_PORT_CM_SUP |
-	    IB_PORT_REINIT_SUP |
-	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
-	props->gid_tbl_len = 1;
-	props->pkey_tbl_len = 1;
-	props->qkey_viol_cntr = 0;
-	props->active_width = 1;
-	props->active_speed = IB_SPEED_SDR;
-
-	return 0;
-}
-
-static int c2_query_pkey(struct ib_device *ibdev,
-			 u8 port, u16 index, u16 * pkey)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	*pkey = 0;
-	return 0;
-}
-
-static int c2_query_gid(struct ib_device *ibdev, u8 port,
-			int index, union ib_gid *gid)
-{
-	struct c2_dev *c2dev = to_c2dev(ibdev);
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
-	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
-
-	return 0;
-}
-
-/* Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
-					     struct ib_udata *udata)
-{
-	struct c2_ucontext *context;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
-	return &context->ibucontext;
-}
-
-static int c2_dealloc_ucontext(struct ib_ucontext *context)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	kfree(context);
-	return 0;
-}
-
-static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
-				 struct ib_ucontext *context,
-				 struct ib_udata *udata)
-{
-	struct c2_pd *pd;
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
-	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
-	if (err) {
-		kfree(pd);
-		return ERR_PTR(err);
-	}
-
-	if (context) {
-		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
-			c2_pd_free(to_c2dev(ibdev), pd);
-			kfree(pd);
-			return ERR_PTR(-EFAULT);
-		}
-	}
-
-	return &pd->ibpd;
-}
-
-static int c2_dealloc_pd(struct ib_pd *pd)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
-	kfree(pd);
-
-	return 0;
-}
-
-static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return ERR_PTR(-ENOSYS);
-}
-
-static int c2_ah_destroy(struct ib_ah *ah)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static void c2_add_ref(struct ib_qp *ibqp)
-{
-	struct c2_qp *qp;
-	BUG_ON(!ibqp);
-	qp = to_c2qp(ibqp);
-	atomic_inc(&qp->refcount);
-}
-
-static void c2_rem_ref(struct ib_qp *ibqp)
-{
-	struct c2_qp *qp;
-	BUG_ON(!ibqp);
-	qp = to_c2qp(ibqp);
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-}
-
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
-{
-	struct c2_dev* c2dev = to_c2dev(device);
-	struct c2_qp *qp;
-
-	qp = c2_find_qpn(c2dev, qpn);
-	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
-		__func__, qp, qpn, device,
-		(qp?atomic_read(&qp->refcount):0));
-
-	return (qp?&qp->ibqp:NULL);
-}
-
-static struct ib_qp *c2_create_qp(struct ib_pd *pd,
-				  struct ib_qp_init_attr *init_attr,
-				  struct ib_udata *udata)
-{
-	struct c2_qp *qp;
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	if (init_attr->create_flags)
-		return ERR_PTR(-EINVAL);
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_RC:
-		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-		if (!qp) {
-			pr_debug("%s: Unable to allocate QP\n", __func__);
-			return ERR_PTR(-ENOMEM);
-		}
-		spin_lock_init(&qp->lock);
-		if (pd->uobject) {
-			/* userspace specific */
-		}
-
-		err = c2_alloc_qp(to_c2dev(pd->device),
-				  to_c2pd(pd), init_attr, qp);
-
-		if (err && pd->uobject) {
-			/* userspace specific */
-		}
-
-		break;
-	default:
-		pr_debug("%s: Invalid QP type: %d\n", __func__,
-			init_attr->qp_type);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (err) {
-		kfree(qp);
-		return ERR_PTR(err);
-	}
-
-	return &qp->ibqp;
-}
-
-static int c2_destroy_qp(struct ib_qp *ib_qp)
-{
-	struct c2_qp *qp = to_c2qp(ib_qp);
-
-	pr_debug("%s:%u qp=%p,qp->state=%d\n",
-		__func__, __LINE__, ib_qp, qp->state);
-	c2_free_qp(to_c2dev(ib_qp->device), qp);
-	kfree(qp);
-	return 0;
-}
-
-static struct ib_cq *c2_create_cq(struct ib_device *ibdev,
-				  const struct ib_cq_init_attr *attr,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct c2_cq *cq;
-	int err;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		pr_debug("%s: Unable to allocate CQ\n", __func__);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
-	if (err) {
-		pr_debug("%s: error initializing CQ\n", __func__);
-		kfree(cq);
-		return ERR_PTR(err);
-	}
-
-	return &cq->ibcq;
-}
-
-static int c2_destroy_cq(struct ib_cq *ib_cq)
-{
-	struct c2_cq *cq = to_c2cq(ib_cq);
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	c2_free_cq(to_c2dev(ib_cq->device), cq);
-	kfree(cq);
-
-	return 0;
-}
-
-static inline u32 c2_convert_access(int acc)
-{
-	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
-	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
-	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
-	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
-}
-
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct c2_mr *mr;
-	u64 *page_list;
-	const u32 total_len = 0xffffffff;	/* AMSO1100 limit */
-	int err, page_shift, pbl_depth, i;
-	u64 kva = 0;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	/*
-	 * This is a map of all phy mem...use a 32k page_shift.
-	 */
-	page_shift = PAGE_SHIFT + 3;
-	pbl_depth = ALIGN(total_len, BIT(page_shift)) >> page_shift;
-
-	page_list = vmalloc(sizeof(u64) * pbl_depth);
-	if (!page_list) {
-		pr_debug("couldn't vmalloc page_list of size %zd\n",
-			(sizeof(u64) * pbl_depth));
-		return ERR_PTR(-ENOMEM);
-	}
-
-	for (i = 0; i < pbl_depth; i++)
-		page_list[i] = (i << page_shift);
-
-	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		vfree(page_list);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	mr->pd = to_c2pd(pd);
-	mr->umem = NULL;
-	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
-		"*iova_start %llx, first pa %llx, last pa %llx\n",
-		__func__, page_shift, pbl_depth, total_len,
-		(unsigned long long) kva,
-	       	(unsigned long long) page_list[0],
-	       	(unsigned long long) page_list[pbl_depth-1]);
-	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), page_list,
-					 BIT(page_shift), pbl_depth,
-					 total_len, 0, &kva,
-					 c2_convert_access(acc), mr);
-	vfree(page_list);
-	if (err) {
-		kfree(mr);
-		return ERR_PTR(err);
-	}
-
-	return &mr->ibmr;
-}
-
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				    u64 virt, int acc, struct ib_udata *udata)
-{
-	u64 *pages;
-	u64 kva = 0;
-	int shift, n, len;
-	int i, k, entry;
-	int err = 0;
-	struct scatterlist *sg;
-	struct c2_pd *c2pd = to_c2pd(pd);
-	struct c2_mr *c2mr;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
-	if (!c2mr)
-		return ERR_PTR(-ENOMEM);
-	c2mr->pd = c2pd;
-
-	c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
-	if (IS_ERR(c2mr->umem)) {
-		err = PTR_ERR(c2mr->umem);
-		kfree(c2mr);
-		return ERR_PTR(err);
-	}
-
-	shift = ffs(c2mr->umem->page_size) - 1;
-	n = c2mr->umem->nmap;
-
-	pages = kmalloc_array(n, sizeof(u64), GFP_KERNEL);
-	if (!pages) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	i = 0;
-	for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
-		len = sg_dma_len(sg) >> shift;
-		for (k = 0; k < len; ++k) {
-			pages[i++] =
-				sg_dma_address(sg) +
-				(c2mr->umem->page_size * k);
-		}
-	}
-
-	kva = virt;
-  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
-					 pages,
-					 c2mr->umem->page_size,
-					 i,
-					 length,
-					 ib_umem_offset(c2mr->umem),
-					 &kva,
-					 c2_convert_access(acc),
-					 c2mr);
-	kfree(pages);
-	if (err)
-		goto err;
-	return &c2mr->ibmr;
-
-err:
-	ib_umem_release(c2mr->umem);
-	kfree(c2mr);
-	return ERR_PTR(err);
-}
-
-static int c2_dereg_mr(struct ib_mr *ib_mr)
-{
-	struct c2_mr *mr = to_c2mr(ib_mr);
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
-	if (err)
-		pr_debug("c2_stag_dealloc failed: %d\n", err);
-	else {
-		if (mr->umem)
-			ib_umem_release(mr->umem);
-		kfree(mr);
-	}
-
-	return err;
-}
-
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "%x\n", c2dev->props.hw_ver);
-}
-
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "%x.%x.%x\n",
-		       (int) (c2dev->props.fw_ver >> 32),
-		       (int) (c2dev->props.fw_ver >> 16) & 0xffff,
-		       (int) (c2dev->props.fw_ver & 0xffff));
-}
-
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "AMSO1100\n");
-}
-
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *c2_dev_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_fw_ver,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
-};
-
-static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-			int attr_mask, struct ib_udata *udata)
-{
-	int err;
-
-	err =
-	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
-			 attr_mask);
-
-	return err;
-}
-
-static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static int c2_process_mad(struct ib_device *ibdev,
-			  int mad_flags,
-			  u8 port_num,
-			  const struct ib_wc *in_wc,
-			  const struct ib_grh *in_grh,
-			  const struct ib_mad_hdr *in_mad,
-			  size_t in_mad_size,
-			  struct ib_mad_hdr *out_mad,
-			  size_t *out_mad_size,
-			  u16 *out_mad_pkey_index)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	/* Request a connection */
-	return c2_llp_connect(cm_id, iw_param);
-}
-
-static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	/* Accept the new connection */
-	return c2_llp_accept(cm_id, iw_param);
-}
-
-static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	return c2_llp_reject(cm_id, pdata, pdata_len);
-}
-
-static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	err = c2_llp_service_create(cm_id, backlog);
-	pr_debug("%s:%u err=%d\n",
-		__func__, __LINE__,
-		err);
-	return err;
-}
-
-static int c2_service_destroy(struct iw_cm_id *cm_id)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	return c2_llp_service_destroy(cm_id);
-}
-
-static int c2_pseudo_up(struct net_device *netdev)
-{
-	struct in_device *ind;
-	struct c2_dev *c2dev = netdev->ml_priv;
-
-	ind = in_dev_get(netdev);
-	if (!ind)
-		return 0;
-
-	pr_debug("adding...\n");
-	for_ifa(ind) {
-#ifdef DEBUG
-		u8 *ip = (u8 *) & ifa->ifa_address;
-
-		pr_debug("%s: %d.%d.%d.%d\n",
-		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-	}
-	endfor_ifa(ind);
-	in_dev_put(ind);
-
-	return 0;
-}
-
-static int c2_pseudo_down(struct net_device *netdev)
-{
-	struct in_device *ind;
-	struct c2_dev *c2dev = netdev->ml_priv;
-
-	ind = in_dev_get(netdev);
-	if (!ind)
-		return 0;
-
-	pr_debug("deleting...\n");
-	for_ifa(ind) {
-#ifdef DEBUG
-		u8 *ip = (u8 *) & ifa->ifa_address;
-
-		pr_debug("%s: %d.%d.%d.%d\n",
-		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-	}
-	endfor_ifa(ind);
-	in_dev_put(ind);
-
-	return 0;
-}
-
-static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-	kfree_skb(skb);
-	return NETDEV_TX_OK;
-}
-
-static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-		return -EINVAL;
-
-	netdev->mtu = new_mtu;
-
-	/* TODO: Tell rnic about new rmda interface mtu */
-	return 0;
-}
-
-static const struct net_device_ops c2_pseudo_netdev_ops = {
-	.ndo_open 		= c2_pseudo_up,
-	.ndo_stop 		= c2_pseudo_down,
-	.ndo_start_xmit 	= c2_pseudo_xmit_frame,
-	.ndo_change_mtu 	= c2_pseudo_change_mtu,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static void setup(struct net_device *netdev)
-{
-	netdev->netdev_ops = &c2_pseudo_netdev_ops;
-
-	netdev->watchdog_timeo = 0;
-	netdev->type = ARPHRD_ETHER;
-	netdev->mtu = 1500;
-	netdev->hard_header_len = ETH_HLEN;
-	netdev->addr_len = ETH_ALEN;
-	netdev->tx_queue_len = 0;
-	netdev->flags |= IFF_NOARP;
-}
-
-static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
-{
-	char name[IFNAMSIZ];
-	struct net_device *netdev;
-
-	/* change ethxxx to iwxxx */
-	strcpy(name, "iw");
-	strcat(name, &c2dev->netdev->name[3]);
-	netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
-	if (!netdev) {
-		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
-			__func__);
-		return NULL;
-	}
-
-	netdev->ml_priv = c2dev;
-
-	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
-
-	/* Print out the MAC address */
-	pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
-
-#if 0
-	/* Disable network packets */
-	netif_stop_queue(netdev);
-#endif
-	return netdev;
-}
-
-static int c2_port_immutable(struct ib_device *ibdev, u8 port_num,
-			     struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = c2_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
-	return 0;
-}
-
-int c2_register_device(struct c2_dev *dev)
-{
-	int ret = -ENOMEM;
-	int i;
-
-	/* Register pseudo network device */
-	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
-	if (!dev->pseudo_netdev)
-		goto out;
-
-	ret = register_netdev(dev->pseudo_netdev);
-	if (ret)
-		goto out_free_netdev;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
-	dev->ibdev.owner = THIS_MODULE;
-	dev->ibdev.uverbs_cmd_mask =
-	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
-	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
-	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
-	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
-	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
-	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
-	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
-	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
-	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
-	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
-
-	dev->ibdev.node_type = RDMA_NODE_RNIC;
-	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
-	dev->ibdev.phys_port_cnt = 1;
-	dev->ibdev.num_comp_vectors = 1;
-	dev->ibdev.dma_device = &dev->pcidev->dev;
-	dev->ibdev.query_device = c2_query_device;
-	dev->ibdev.query_port = c2_query_port;
-	dev->ibdev.query_pkey = c2_query_pkey;
-	dev->ibdev.query_gid = c2_query_gid;
-	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
-	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
-	dev->ibdev.mmap = c2_mmap_uar;
-	dev->ibdev.alloc_pd = c2_alloc_pd;
-	dev->ibdev.dealloc_pd = c2_dealloc_pd;
-	dev->ibdev.create_ah = c2_ah_create;
-	dev->ibdev.destroy_ah = c2_ah_destroy;
-	dev->ibdev.create_qp = c2_create_qp;
-	dev->ibdev.modify_qp = c2_modify_qp;
-	dev->ibdev.destroy_qp = c2_destroy_qp;
-	dev->ibdev.create_cq = c2_create_cq;
-	dev->ibdev.destroy_cq = c2_destroy_cq;
-	dev->ibdev.poll_cq = c2_poll_cq;
-	dev->ibdev.get_dma_mr = c2_get_dma_mr;
-	dev->ibdev.reg_user_mr = c2_reg_user_mr;
-	dev->ibdev.dereg_mr = c2_dereg_mr;
-	dev->ibdev.get_port_immutable = c2_port_immutable;
-
-	dev->ibdev.alloc_fmr = NULL;
-	dev->ibdev.unmap_fmr = NULL;
-	dev->ibdev.dealloc_fmr = NULL;
-	dev->ibdev.map_phys_fmr = NULL;
-
-	dev->ibdev.attach_mcast = c2_multicast_attach;
-	dev->ibdev.detach_mcast = c2_multicast_detach;
-	dev->ibdev.process_mad = c2_process_mad;
-
-	dev->ibdev.req_notify_cq = c2_arm_cq;
-	dev->ibdev.post_send = c2_post_send;
-	dev->ibdev.post_recv = c2_post_receive;
-
-	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
-	if (dev->ibdev.iwcm == NULL) {
-		ret = -ENOMEM;
-		goto out_unregister_netdev;
-	}
-	dev->ibdev.iwcm->add_ref = c2_add_ref;
-	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
-	dev->ibdev.iwcm->get_qp = c2_get_qp;
-	dev->ibdev.iwcm->connect = c2_connect;
-	dev->ibdev.iwcm->accept = c2_accept;
-	dev->ibdev.iwcm->reject = c2_reject;
-	dev->ibdev.iwcm->create_listen = c2_service_create;
-	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
-
-	ret = ib_register_device(&dev->ibdev, NULL);
-	if (ret)
-		goto out_free_iwcm;
-
-	for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
-		ret = device_create_file(&dev->ibdev.dev,
-					       c2_dev_attributes[i]);
-		if (ret)
-			goto out_unregister_ibdev;
-	}
-	goto out;
-
-out_unregister_ibdev:
-	ib_unregister_device(&dev->ibdev);
-out_free_iwcm:
-	kfree(dev->ibdev.iwcm);
-out_unregister_netdev:
-	unregister_netdev(dev->pseudo_netdev);
-out_free_netdev:
-	free_netdev(dev->pseudo_netdev);
-out:
-	pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
-	return ret;
-}
-
-void c2_unregister_device(struct c2_dev *dev)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	unregister_netdev(dev->pseudo_netdev);
-	free_netdev(dev->pseudo_netdev);
-	ib_unregister_device(&dev->ibdev);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.h b/drivers/staging/rdma/amso1100/c2_provider.h
deleted file mode 100644
index bf18998..0000000
--- a/drivers/staging/rdma/amso1100/c2_provider.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_PROVIDER_H
-#define C2_PROVIDER_H
-#include <linux/inetdevice.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-
-#include "c2_mq.h"
-#include <rdma/iw_cm.h>
-
-#define C2_MPT_FLAG_ATOMIC        (1 << 14)
-#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
-#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
-#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
-#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
-
-struct c2_buf_list {
-	void *buf;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-};
-
-
-/* The user context keeps track of objects allocated for a
- * particular user-mode client. */
-struct c2_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-struct c2_mtt;
-
-/* All objects associated with a PD are kept in the
- * associated user context if present.
- */
-struct c2_pd {
-	struct ib_pd ibpd;
-	u32 pd_id;
-};
-
-struct c2_mr {
-	struct ib_mr ibmr;
-	struct c2_pd *pd;
-	struct ib_umem *umem;
-};
-
-struct c2_av;
-
-enum c2_ah_type {
-	C2_AH_ON_HCA,
-	C2_AH_PCI_POOL,
-	C2_AH_KMALLOC
-};
-
-struct c2_ah {
-	struct ib_ah ibah;
-};
-
-struct c2_cq {
-	struct ib_cq ibcq;
-	spinlock_t lock;
-	atomic_t refcount;
-	int cqn;
-	int is_kernel;
-	wait_queue_head_t wait;
-
-	u32 adapter_handle;
-	struct c2_mq mq;
-};
-
-struct c2_wq {
-	spinlock_t lock;
-};
-struct iw_cm_id;
-struct c2_qp {
-	struct ib_qp ibqp;
-	struct iw_cm_id *cm_id;
-	spinlock_t lock;
-	atomic_t refcount;
-	wait_queue_head_t wait;
-	int qpn;
-
-	u32 adapter_handle;
-	u32 send_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u8 state;
-
-	struct c2_mq sq_mq;
-	struct c2_mq rq_mq;
-};
-
-struct c2_cr_query_attrs {
-	u32 local_addr;
-	u32 remote_addr;
-	u16 local_port;
-	u16 remote_port;
-};
-
-static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct c2_pd, ibpd);
-}
-
-static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
-{
-	return container_of(ibucontext, struct c2_ucontext, ibucontext);
-}
-
-static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct c2_mr, ibmr);
-}
-
-
-static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct c2_ah, ibah);
-}
-
-static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct c2_cq, ibcq);
-}
-
-static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct c2_qp, ibqp);
-}
-
-static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
-{
-	struct in_device *ind;
-	int ret = 0;
-
-	ind = in_dev_get(netdev);
-	if (!ind)
-		return 0;
-
-	for_ifa(ind) {
-		if (ifa->ifa_address == addr) {
-			ret = 1;
-			break;
-		}
-	}
-	endfor_ifa(ind);
-	in_dev_put(ind);
-	return ret;
-}
-#endif				/* C2_PROVIDER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
deleted file mode 100644
index ca364db..0000000
--- a/drivers/staging/rdma/amso1100/c2_qp.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_MAX_ORD_PER_QP 128
-#define C2_MAX_IRD_PER_QP 128
-
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-#define NO_SUPPORT -1
-static const u8 c2_opcode[] = {
-	[IB_WR_SEND] = C2_WR_TYPE_SEND,
-	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
-	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
-	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
-	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
-	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
-	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
-};
-
-static int to_c2_state(enum ib_qp_state ib_state)
-{
-	switch (ib_state) {
-	case IB_QPS_RESET:
-		return C2_QP_STATE_IDLE;
-	case IB_QPS_RTS:
-		return C2_QP_STATE_RTS;
-	case IB_QPS_SQD:
-		return C2_QP_STATE_CLOSING;
-	case IB_QPS_SQE:
-		return C2_QP_STATE_CLOSING;
-	case IB_QPS_ERR:
-		return C2_QP_STATE_ERROR;
-	default:
-		return -1;
-	}
-}
-
-static int to_ib_state(enum c2_qp_state c2_state)
-{
-	switch (c2_state) {
-	case C2_QP_STATE_IDLE:
-		return IB_QPS_RESET;
-	case C2_QP_STATE_CONNECTING:
-		return IB_QPS_RTR;
-	case C2_QP_STATE_RTS:
-		return IB_QPS_RTS;
-	case C2_QP_STATE_CLOSING:
-		return IB_QPS_SQD;
-	case C2_QP_STATE_ERROR:
-		return IB_QPS_ERR;
-	case C2_QP_STATE_TERMINATE:
-		return IB_QPS_SQE;
-	default:
-		return -1;
-	}
-}
-
-static const char *to_ib_state_str(int ib_state)
-{
-	static const char *state_str[] = {
-		"IB_QPS_RESET",
-		"IB_QPS_INIT",
-		"IB_QPS_RTR",
-		"IB_QPS_RTS",
-		"IB_QPS_SQD",
-		"IB_QPS_SQE",
-		"IB_QPS_ERR"
-	};
-	if (ib_state < IB_QPS_RESET ||
-	    ib_state > IB_QPS_ERR)
-		return "<invalid IB QP state>";
-
-	ib_state -= IB_QPS_RESET;
-	return state_str[ib_state];
-}
-
-void c2_set_qp_state(struct c2_qp *qp, int c2_state)
-{
-	int new_state = to_ib_state(c2_state);
-
-	pr_debug("%s: qp[%p] state modify %s --> %s\n",
-	       __func__,
-		qp,
-		to_ib_state_str(qp->state),
-		to_ib_state_str(new_state));
-	qp->state = new_state;
-}
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-		 struct ib_qp_attr *attr, int attr_mask)
-{
-	struct c2wr_qp_modify_req wr;
-	struct c2wr_qp_modify_rep *reply;
-	struct c2_vq_req *vq_req;
-	unsigned long flags;
-	u8 next_state;
-	int err;
-
-	pr_debug("%s:%d qp=%p, %s --> %s\n",
-		__func__, __LINE__,
-		qp,
-		to_ib_state_str(qp->state),
-		to_ib_state_str(attr->qp_state));
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.qp_handle = qp->adapter_handle;
-	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-	if (attr_mask & IB_QP_STATE) {
-		/* Ensure the state is valid */
-		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
-			err = -EINVAL;
-			goto bail0;
-		}
-
-		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
-
-		if (attr->qp_state == IB_QPS_ERR) {
-			spin_lock_irqsave(&qp->lock, flags);
-			if (qp->cm_id && qp->state == IB_QPS_RTS) {
-				pr_debug("Generating CLOSE event for QP-->ERR, "
-					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
-				/* Generate an CLOSE event */
-				vq_req->cm_id = qp->cm_id;
-				vq_req->event = IW_CM_EVENT_CLOSE;
-			}
-			spin_unlock_irqrestore(&qp->lock, flags);
-		}
-		next_state =  attr->qp_state;
-
-	} else if (attr_mask & IB_QP_CUR_STATE) {
-
-		if (attr->cur_qp_state != IB_QPS_RTR &&
-		    attr->cur_qp_state != IB_QPS_RTS &&
-		    attr->cur_qp_state != IB_QPS_SQD &&
-		    attr->cur_qp_state != IB_QPS_SQE) {
-			err = -EINVAL;
-			goto bail0;
-		} else
-			wr.next_qp_state =
-			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
-
-		next_state = attr->cur_qp_state;
-
-	} else {
-		err = 0;
-		goto bail0;
-	}
-
-	/* reference the request struct */
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-	if (!err)
-		qp->state = next_state;
-#ifdef DEBUG
-	else
-		pr_debug("%s: c2_errno=%d\n", __func__, err);
-#endif
-	/*
-	 * If we're going to error and generating the event here, then
-	 * we need to remove the reference because there will be no
-	 * close event generated by the adapter
-	*/
-	spin_lock_irqsave(&qp->lock, flags);
-	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
-		qp->cm_id->rem_ref(qp->cm_id);
-		qp->cm_id = NULL;
-	}
-	spin_unlock_irqrestore(&qp->lock, flags);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-
-	pr_debug("%s:%d qp=%p, cur_state=%s\n",
-		__func__, __LINE__,
-		qp,
-		to_ib_state_str(qp->state));
-	return err;
-}
-
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-			  int ord, int ird)
-{
-	struct c2wr_qp_modify_req wr;
-	struct c2wr_qp_modify_rep *reply;
-	struct c2_vq_req *vq_req;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.qp_handle = qp->adapter_handle;
-	wr.ord = cpu_to_be32(ord);
-	wr.ird = cpu_to_be32(ird);
-	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-	/* reference the request struct */
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
-		vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_qp_destroy_req wr;
-	struct c2wr_qp_destroy_rep *reply;
-	unsigned long flags;
-	int err;
-
-	/*
-	 * Allocate a verb request message
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		return -ENOMEM;
-	}
-
-	/*
-	 * Initialize the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.qp_handle = qp->adapter_handle;
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	spin_lock_irqsave(&qp->lock, flags);
-	if (qp->cm_id && qp->state == IB_QPS_RTS) {
-		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
-			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
-		/* Generate an CLOSE event */
-		vq_req->qp = qp;
-		vq_req->cm_id = qp->cm_id;
-		vq_req->event = IW_CM_EVENT_CLOSE;
-	}
-	spin_unlock_irqrestore(&qp->lock, flags);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	spin_lock_irqsave(&qp->lock, flags);
-	if (qp->cm_id) {
-		qp->cm_id->rem_ref(qp->cm_id);
-		qp->cm_id = NULL;
-	}
-	spin_unlock_irqrestore(&qp->lock, flags);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-	int ret;
-
-	idr_preload(GFP_KERNEL);
-	spin_lock_irq(&c2dev->qp_table.lock);
-
-	ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT);
-	if (ret >= 0)
-		qp->qpn = ret;
-
-	spin_unlock_irq(&c2dev->qp_table.lock);
-	idr_preload_end();
-	return ret < 0 ? ret : 0;
-}
-
-static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
-{
-	spin_lock_irq(&c2dev->qp_table.lock);
-	idr_remove(&c2dev->qp_table.idr, qpn);
-	spin_unlock_irq(&c2dev->qp_table.lock);
-}
-
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
-{
-	unsigned long flags;
-	struct c2_qp *qp;
-
-	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
-	qp = idr_find(&c2dev->qp_table.idr, qpn);
-	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
-	return qp;
-}
-
-int c2_alloc_qp(struct c2_dev *c2dev,
-		struct c2_pd *pd,
-		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
-{
-	struct c2wr_qp_create_req wr;
-	struct c2wr_qp_create_rep *reply;
-	struct c2_vq_req *vq_req;
-	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
-	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
-	unsigned long peer_pa;
-	u32 q_size, msg_size, mmap_size;
-	void __iomem *mmap;
-	int err;
-
-	err = c2_alloc_qpn(c2dev, qp);
-	if (err)
-		return err;
-	qp->ibqp.qp_num = qp->qpn;
-	qp->ibqp.qp_type = IB_QPT_RC;
-
-	/* Allocate the SQ and RQ shared pointers */
-	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					 &qp->sq_mq.shared_dma, GFP_KERNEL);
-	if (!qp->sq_mq.shared) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					 &qp->rq_mq.shared_dma, GFP_KERNEL);
-	if (!qp->rq_mq.shared) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	/* Allocate the verbs request */
-	vq_req = vq_req_alloc(c2dev);
-	if (vq_req == NULL) {
-		err = -ENOMEM;
-		goto bail2;
-	}
-
-	/* Initialize the work request */
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_QP_CREATE);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.sq_cq_handle = send_cq->adapter_handle;
-	wr.rq_cq_handle = recv_cq->adapter_handle;
-	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
-	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
-	wr.srq_handle = 0;
-	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
-			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
-	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
-	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
-	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
-	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
-	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
-	wr.pd_id = pd->pd_id;
-	wr.user_context = (unsigned long) qp;
-
-	vq_req_get(c2dev, vq_req);
-
-	/* Send the WR to the adapter */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail3;
-	}
-
-	/* Wait for the verb reply  */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail3;
-	}
-
-	/* Process the reply */
-	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail3;
-	}
-
-	if ((err = c2_wr_get_result(reply)) != 0) {
-		goto bail4;
-	}
-
-	/* Fill in the kernel QP struct */
-	atomic_set(&qp->refcount, 1);
-	qp->adapter_handle = reply->qp_handle;
-	qp->state = IB_QPS_RESET;
-	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
-	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
-	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
-	init_waitqueue_head(&qp->wait);
-
-	/* Initialize the SQ MQ */
-	q_size = be32_to_cpu(reply->sq_depth);
-	msg_size = be32_to_cpu(reply->sq_msg_size);
-	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
-	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-	mmap = ioremap_nocache(peer_pa, mmap_size);
-	if (!mmap) {
-		err = -ENOMEM;
-		goto bail5;
-	}
-
-	c2_mq_req_init(&qp->sq_mq,
-		       be32_to_cpu(reply->sq_mq_index),
-		       q_size,
-		       msg_size,
-		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
-		       mmap,				/* peer */
-		       C2_MQ_ADAPTER_TARGET);
-
-	/* Initialize the RQ mq */
-	q_size = be32_to_cpu(reply->rq_depth);
-	msg_size = be32_to_cpu(reply->rq_msg_size);
-	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
-	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-	mmap = ioremap_nocache(peer_pa, mmap_size);
-	if (!mmap) {
-		err = -ENOMEM;
-		goto bail6;
-	}
-
-	c2_mq_req_init(&qp->rq_mq,
-		       be32_to_cpu(reply->rq_mq_index),
-		       q_size,
-		       msg_size,
-		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
-		       mmap,				/* peer */
-		       C2_MQ_ADAPTER_TARGET);
-
-	vq_repbuf_free(c2dev, reply);
-	vq_req_free(c2dev, vq_req);
-
-	return 0;
-
-bail6:
-	iounmap(qp->sq_mq.peer);
-bail5:
-	destroy_qp(c2dev, qp);
-bail4:
-	vq_repbuf_free(c2dev, reply);
-bail3:
-	vq_req_free(c2dev, vq_req);
-bail2:
-	c2_free_mqsp(qp->rq_mq.shared);
-bail1:
-	c2_free_mqsp(qp->sq_mq.shared);
-bail0:
-	c2_free_qpn(c2dev, qp->qpn);
-	return err;
-}
-
-static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-	if (send_cq == recv_cq)
-		spin_lock_irq(&send_cq->lock);
-	else if (send_cq > recv_cq) {
-		spin_lock_irq(&send_cq->lock);
-		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
-	} else {
-		spin_lock_irq(&recv_cq->lock);
-		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
-	}
-}
-
-static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-	if (send_cq == recv_cq)
-		spin_unlock_irq(&send_cq->lock);
-	else if (send_cq > recv_cq) {
-		spin_unlock(&recv_cq->lock);
-		spin_unlock_irq(&send_cq->lock);
-	} else {
-		spin_unlock(&send_cq->lock);
-		spin_unlock_irq(&recv_cq->lock);
-	}
-}
-
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-	struct c2_cq *send_cq;
-	struct c2_cq *recv_cq;
-
-	send_cq = to_c2cq(qp->ibqp.send_cq);
-	recv_cq = to_c2cq(qp->ibqp.recv_cq);
-
-	/*
-	 * Lock CQs here, so that CQ polling code can do QP lookup
-	 * without taking a lock.
-	 */
-	c2_lock_cqs(send_cq, recv_cq);
-	c2_free_qpn(c2dev, qp->qpn);
-	c2_unlock_cqs(send_cq, recv_cq);
-
-	/*
-	 * Destroy qp in the rnic...
-	 */
-	destroy_qp(c2dev, qp);
-
-	/*
-	 * Mark any unreaped CQEs as null and void.
-	 */
-	c2_cq_clean(c2dev, qp, send_cq->cqn);
-	if (send_cq != recv_cq)
-		c2_cq_clean(c2dev, qp, recv_cq->cqn);
-	/*
-	 * Unmap the MQs and return the shared pointers
-	 * to the message pool.
-	 */
-	iounmap(qp->sq_mq.peer);
-	iounmap(qp->rq_mq.peer);
-	c2_free_mqsp(qp->sq_mq.shared);
-	c2_free_mqsp(qp->rq_mq.shared);
-
-	atomic_dec(&qp->refcount);
-	wait_event(qp->wait, !atomic_read(&qp->refcount));
-}
-
-/*
- * Function: move_sgl
- *
- * Description:
- * Move an SGL from the user's work request struct into a CCIL Work Request
- * message, swapping to WR byte order and ensure the total length doesn't
- * overflow.
- *
- * IN:
- * dst		- ptr to CCIL Work Request message SGL memory.
- * src		- ptr to the consumers SGL memory.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int
-move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
-	 u8 * actual_count)
-{
-	u32 tot = 0;		/* running total */
-	u8 acount = 0;		/* running total non-0 len sge's */
-
-	while (count > 0) {
-		/*
-		 * If the addition of this SGE causes the
-		 * total SGL length to exceed 2^32-1, then
-		 * fail-n-bail.
-		 *
-		 * If the current total plus the next element length
-		 * wraps, then it will go negative and be less than the
-		 * current total...
-		 */
-		if ((tot + src->length) < tot) {
-			return -EINVAL;
-		}
-		/*
-		 * Bug: 1456 (as well as 1498 & 1643)
-		 * Skip over any sge's supplied with len=0
-		 */
-		if (src->length) {
-			tot += src->length;
-			dst->stag = cpu_to_be32(src->lkey);
-			dst->to = cpu_to_be64(src->addr);
-			dst->length = cpu_to_be32(src->length);
-			dst++;
-			acount++;
-		}
-		src++;
-		count--;
-	}
-
-	if (acount == 0) {
-		/*
-		 * Bug: 1476 (as well as 1498, 1456 and 1643)
-		 * Setup the SGL in the WR to make it easier for the RNIC.
-		 * This way, the FW doesn't have to deal with special cases.
-		 * Setting length=0 should be sufficient.
-		 */
-		dst->stag = 0;
-		dst->to = 0;
-		dst->length = 0;
-	}
-
-	*p_len = tot;
-	*actual_count = acount;
-	return 0;
-}
-
-/*
- * Function: c2_activity (private function)
- *
- * Description:
- * Post an mq index to the host->adapter activity fifo.
- *
- * IN:
- * c2dev	- ptr to c2dev structure
- * mq_index	- mq index to post
- * shared	- value most recently written to shared
- *
- * OUT:
- *
- * Return:
- * none
- */
-static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
-{
-	/*
-	 * First read the register to see if the FIFO is full, and if so,
-	 * spin until it's not.  This isn't perfect -- there is no
-	 * synchronization among the clients of the register, but in
-	 * practice it prevents multiple CPU from hammering the bus
-	 * with PCI RETRY. Note that when this does happen, the card
-	 * cannot get on the bus and the card and system hang in a
-	 * deadlock -- thus the need for this code. [TOT]
-	 */
-	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
-		udelay(10);
-
-	__raw_writel(C2_HINT_MAKE(mq_index, shared),
-		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
-}
-
-/*
- * Function: qp_wr_post
- *
- * Description:
- * This in-line function allocates a MQ msg, then moves the host-copy of
- * the completed WR into msg.  Then it posts the message.
- *
- * IN:
- * q		- ptr to user MQ.
- * wr		- ptr to host-copy of the WR.
- * qp		- ptr to user qp
- * size		- Number of bytes to post.  Assumed to be divisible by 4.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
-{
-	union c2wr *msg;
-
-	msg = c2_mq_alloc(q);
-	if (msg == NULL) {
-		return -EINVAL;
-	}
-#ifdef CCMSGMAGIC
-	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-
-	/*
-	 * Since all header fields in the WR are the same as the
-	 * CQE, set the following so the adapter need not.
-	 */
-	c2_wr_set_result(wr, CCERR_PENDING);
-
-	/*
-	 * Copy the wr down to the adapter
-	 */
-	memcpy((void *) msg, (void *) wr, size);
-
-	c2_mq_produce(q);
-	return 0;
-}
-
-
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-		 struct ib_send_wr **bad_wr)
-{
-	struct c2_dev *c2dev = to_c2dev(ibqp->device);
-	struct c2_qp *qp = to_c2qp(ibqp);
-	union c2wr wr;
-	unsigned long lock_flags;
-	int err = 0;
-
-	u32 flags;
-	u32 tot_len;
-	u8 actual_sge_count;
-	u32 msg_size;
-
-	if (qp->state > IB_QPS_RTS) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	while (ib_wr) {
-
-		flags = 0;
-		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-			flags |= SQ_SIGNALED;
-		}
-
-		switch (ib_wr->opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_INV:
-			if (ib_wr->opcode == IB_WR_SEND) {
-				if (ib_wr->send_flags & IB_SEND_SOLICITED)
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
-				else
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
-				wr.sqwr.send.remote_stag = 0;
-			} else {
-				if (ib_wr->send_flags & IB_SEND_SOLICITED)
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
-				else
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
-				wr.sqwr.send.remote_stag =
-					cpu_to_be32(ib_wr->ex.invalidate_rkey);
-			}
-
-			msg_size = sizeof(struct c2wr_send_req) +
-				sizeof(struct c2_data_addr) * ib_wr->num_sge;
-			if (ib_wr->num_sge > qp->send_sgl_depth) {
-				err = -EINVAL;
-				break;
-			}
-			if (ib_wr->send_flags & IB_SEND_FENCE) {
-				flags |= SQ_READ_FENCE;
-			}
-			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
-				       ib_wr->sg_list,
-				       ib_wr->num_sge,
-				       &tot_len, &actual_sge_count);
-			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
-			c2_wr_set_sge_count(&wr, actual_sge_count);
-			break;
-		case IB_WR_RDMA_WRITE:
-			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
-			msg_size = sizeof(struct c2wr_rdma_write_req) +
-			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
-			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
-				err = -EINVAL;
-				break;
-			}
-			if (ib_wr->send_flags & IB_SEND_FENCE) {
-				flags |= SQ_READ_FENCE;
-			}
-			wr.sqwr.rdma_write.remote_stag =
-			    cpu_to_be32(rdma_wr(ib_wr)->rkey);
-			wr.sqwr.rdma_write.remote_to =
-			    cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-			err = move_sgl((struct c2_data_addr *)
-				       & (wr.sqwr.rdma_write.data),
-				       ib_wr->sg_list,
-				       ib_wr->num_sge,
-				       &tot_len, &actual_sge_count);
-			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
-			c2_wr_set_sge_count(&wr, actual_sge_count);
-			break;
-		case IB_WR_RDMA_READ:
-			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
-			msg_size = sizeof(struct c2wr_rdma_read_req);
-
-			/* IWarp only suppots 1 sge for RDMA reads */
-			if (ib_wr->num_sge > 1) {
-				err = -EINVAL;
-				break;
-			}
-
-			/*
-			 * Move the local and remote stag/to/len into the WR.
-			 */
-			wr.sqwr.rdma_read.local_stag =
-			    cpu_to_be32(ib_wr->sg_list->lkey);
-			wr.sqwr.rdma_read.local_to =
-			    cpu_to_be64(ib_wr->sg_list->addr);
-			wr.sqwr.rdma_read.remote_stag =
-			    cpu_to_be32(rdma_wr(ib_wr)->rkey);
-			wr.sqwr.rdma_read.remote_to =
-			    cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-			wr.sqwr.rdma_read.length =
-			    cpu_to_be32(ib_wr->sg_list->length);
-			break;
-		default:
-			/* error */
-			msg_size = 0;
-			err = -EINVAL;
-			break;
-		}
-
-		/*
-		 * If we had an error on the last wr build, then
-		 * break out.  Possible errors include bogus WR
-		 * type, and a bogus SGL length...
-		 */
-		if (err) {
-			break;
-		}
-
-		/*
-		 * Store flags
-		 */
-		c2_wr_set_flags(&wr, flags);
-
-		/*
-		 * Post the puppy!
-		 */
-		spin_lock_irqsave(&qp->lock, lock_flags);
-		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
-		if (err) {
-			spin_unlock_irqrestore(&qp->lock, lock_flags);
-			break;
-		}
-
-		/*
-		 * Enqueue mq index to activity FIFO.
-		 */
-		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
-		spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-		ib_wr = ib_wr->next;
-	}
-
-out:
-	if (err)
-		*bad_wr = ib_wr;
-	return err;
-}
-
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-		    struct ib_recv_wr **bad_wr)
-{
-	struct c2_dev *c2dev = to_c2dev(ibqp->device);
-	struct c2_qp *qp = to_c2qp(ibqp);
-	union c2wr wr;
-	unsigned long lock_flags;
-	int err = 0;
-
-	if (qp->state > IB_QPS_RTS) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Try and post each work request
-	 */
-	while (ib_wr) {
-		u32 tot_len;
-		u8 actual_sge_count;
-
-		if (ib_wr->num_sge > qp->recv_sgl_depth) {
-			err = -EINVAL;
-			break;
-		}
-
-		/*
-		 * Create local host-copy of the WR
-		 */
-		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-		c2_wr_set_id(&wr, CCWR_RECV);
-		c2_wr_set_flags(&wr, 0);
-
-		/* sge_count is limited to eight bits. */
-		BUG_ON(ib_wr->num_sge >= 256);
-		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
-			       ib_wr->sg_list,
-			       ib_wr->num_sge, &tot_len, &actual_sge_count);
-		c2_wr_set_sge_count(&wr, actual_sge_count);
-
-		/*
-		 * If we had an error on the last wr build, then
-		 * break out.  Possible errors include bogus WR
-		 * type, and a bogus SGL length...
-		 */
-		if (err) {
-			break;
-		}
-
-		spin_lock_irqsave(&qp->lock, lock_flags);
-		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
-		if (err) {
-			spin_unlock_irqrestore(&qp->lock, lock_flags);
-			break;
-		}
-
-		/*
-		 * Enqueue mq index to activity FIFO
-		 */
-		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
-		spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-		ib_wr = ib_wr->next;
-	}
-
-out:
-	if (err)
-		*bad_wr = ib_wr;
-	return err;
-}
-
-void c2_init_qp_table(struct c2_dev *c2dev)
-{
-	spin_lock_init(&c2dev->qp_table.lock);
-	idr_init(&c2dev->qp_table.idr);
-}
-
-void c2_cleanup_qp_table(struct c2_dev *c2dev)
-{
-	idr_destroy(&c2dev->qp_table.idr);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_rnic.c b/drivers/staging/rdma/amso1100/c2_rnic.c
deleted file mode 100644
index 5e65c6d..0000000
--- a/drivers/staging/rdma/amso1100/c2_rnic.c
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/inet.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <linux/route.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_vq.h"
-
-/* Device capabilities */
-#define C2_MIN_PAGESIZE  1024
-
-#define C2_MAX_MRS       32768
-#define C2_MAX_QPS       16000
-#define C2_MAX_WQE_SZ    256
-#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
-#define C2_MAX_SGES      4
-#define C2_MAX_SGE_RD    1
-#define C2_MAX_CQS       32768
-#define C2_MAX_CQES      4096
-#define C2_MAX_PDS       16384
-
-/*
- * Send the adapter INIT message to the amso1100
- */
-static int c2_adapter_init(struct c2_dev *c2dev)
-{
-	struct c2wr_init_req wr;
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_INIT);
-	wr.hdr.context = 0;
-	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
-	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
-	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
-	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
-	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
-	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
-
-	/* Post the init message */
-	return vq_send_wr(c2dev, (union c2wr *) & wr);
-}
-
-/*
- * Send the adapter TERM message to the amso1100
- */
-static void c2_adapter_term(struct c2_dev *c2dev)
-{
-	struct c2wr_init_req wr;
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_TERM);
-	wr.hdr.context = 0;
-
-	/* Post the init message */
-	vq_send_wr(c2dev, (union c2wr *) & wr);
-	c2dev->init = 0;
-
-	return;
-}
-
-/*
- * Query the adapter
- */
-static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_rnic_query_req wr;
-	struct c2wr_rnic_query_rep *reply;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) &wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply =
-	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply)
-		err = -ENOMEM;
-	else
-		err = c2_errno(reply);
-	if (err)
-		goto bail2;
-
-	props->fw_ver =
-		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
-		((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
-		(be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
-	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
-	props->max_mr_size         = 0xFFFFFFFF;
-	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
-	props->vendor_id           = be32_to_cpu(reply->vendor_id);
-	props->vendor_part_id      = be32_to_cpu(reply->part_number);
-	props->hw_ver              = be32_to_cpu(reply->hw_version);
-	props->max_qp              = be32_to_cpu(reply->max_qps);
-	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
-	props->device_cap_flags    = c2dev->device_cap_flags;
-	props->max_sge             = C2_MAX_SGES;
-	props->max_sge_rd          = C2_MAX_SGE_RD;
-	props->max_cq              = be32_to_cpu(reply->max_cqs);
-	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
-	props->max_mr              = be32_to_cpu(reply->max_mrs);
-	props->max_pd              = be32_to_cpu(reply->max_pds);
-	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
-	props->max_ee_rd_atom      = 0;
-	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
-	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
-	props->max_ee_init_rd_atom = 0;
-	props->atomic_cap          = IB_ATOMIC_NONE;
-	props->max_ee              = 0;
-	props->max_rdd             = 0;
-	props->max_mw              = be32_to_cpu(reply->max_mws);
-	props->max_raw_ipv6_qp     = 0;
-	props->max_raw_ethy_qp     = 0;
-	props->max_mcast_grp       = 0;
-	props->max_mcast_qp_attach = 0;
-	props->max_total_mcast_qp_attach = 0;
-	props->max_ah              = 0;
-	props->max_fmr             = 0;
-	props->max_map_per_fmr     = 0;
-	props->max_srq             = 0;
-	props->max_srq_wr          = 0;
-	props->max_srq_sge         = 0;
-	props->max_pkeys           = 0;
-	props->local_ca_ack_delay  = 0;
-
- bail2:
-	vq_repbuf_free(c2dev, reply);
-
- bail1:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Add an IP address to the RNIC interface
- */
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_rnic_setconfig_req *wr;
-	struct c2wr_rnic_setconfig_rep *reply;
-	struct c2_netaddr netaddr;
-	int err, len;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	len = sizeof(struct c2_netaddr);
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
-
-	netaddr.ip_addr = inaddr;
-	netaddr.netmask = inmask;
-	netaddr.mtu = 0;
-
-	memcpy(wr->data, &netaddr, len);
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply =
-	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-
-bail1:
-	kfree(wr);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Delete an IP address from the RNIC interface
- */
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_rnic_setconfig_req *wr;
-	struct c2wr_rnic_setconfig_rep *reply;
-	struct c2_netaddr netaddr;
-	int err, len;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	len = sizeof(struct c2_netaddr);
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
-
-	netaddr.ip_addr = inaddr;
-	netaddr.netmask = inmask;
-	netaddr.mtu = 0;
-
-	memcpy(wr->data, &netaddr, len);
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply =
-	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-
-bail1:
-	kfree(wr);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Open a single RNIC instance to use with all
- * low level openib calls
- */
-static int c2_rnic_open(struct c2_dev *c2dev)
-{
-	struct c2_vq_req *vq_req;
-	union c2wr wr;
-	struct c2wr_rnic_open_rep *reply;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (vq_req == NULL) {
-		return -ENOMEM;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
-	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
-	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
-	wr.rnic_open.req.port_num = cpu_to_be16(0);
-	wr.rnic_open.req.user_context = (unsigned long) c2dev;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, &wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	if ((err = c2_errno(reply)) != 0) {
-		goto bail1;
-	}
-
-	c2dev->adapter_handle = reply->rnic_handle;
-
-bail1:
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Close the RNIC instance
- */
-static int c2_rnic_close(struct c2_dev *c2dev)
-{
-	struct c2_vq_req *vq_req;
-	union c2wr wr;
-	struct c2wr_rnic_close_rep *reply;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (vq_req == NULL) {
-		return -ENOMEM;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
-	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
-	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, &wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	if ((err = c2_errno(reply)) != 0) {
-		goto bail1;
-	}
-
-	c2dev->adapter_handle = 0;
-
-bail1:
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Called by c2_probe to initialize the RNIC. This principally
- * involves initializing the various limits and resource pools that
- * comprise the RNIC instance.
- */
-int c2_rnic_init(struct c2_dev *c2dev)
-{
-	int err;
-	u32 qsize, msgsize;
-	void *q1_pages;
-	void *q2_pages;
-	void __iomem *mmio_regs;
-
-	/* Device capabilities */
-	c2dev->device_cap_flags =
-	    (IB_DEVICE_RESIZE_MAX_WR |
-	     IB_DEVICE_CURR_QP_STATE_MOD |
-	     IB_DEVICE_SYS_IMAGE_GUID |
-	     IB_DEVICE_LOCAL_DMA_LKEY |
-	     IB_DEVICE_MEM_WINDOW);
-
-	/* Allocate the qptr_array */
-	c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
-	if (!c2dev->qptr_array) {
-		return -ENOMEM;
-	}
-
-	/* Initialize the qptr_array */
-	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
-	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
-	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
-
-	/* Initialize data structures */
-	init_waitqueue_head(&c2dev->req_vq_wo);
-	spin_lock_init(&c2dev->vqlock);
-	spin_lock_init(&c2dev->lock);
-
-	/* Allocate MQ shared pointer pool for kernel clients. User
-	 * mode client pools are hung off the user context
-	 */
-	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
-	if (err) {
-		goto bail0;
-	}
-
-	/* Allocate shared pointers for Q0, Q1, and Q2 from
-	 * the shared pointer pool.
-	 */
-
-	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					     &c2dev->hint_count_dma,
-					     GFP_KERNEL);
-	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					     &c2dev->req_vq.shared_dma,
-					     GFP_KERNEL);
-	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					     &c2dev->rep_vq.shared_dma,
-					     GFP_KERNEL);
-	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					  &c2dev->aeq.shared_dma, GFP_KERNEL);
-	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
-	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	mmio_regs = c2dev->kva;
-	/* Initialize the Verbs Request Queue */
-	c2_mq_req_init(&c2dev->req_vq, 0,
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
-		       mmio_regs +
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
-		       mmio_regs +
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
-		       C2_MQ_ADAPTER_TARGET);
-
-	/* Initialize the Verbs Reply Queue */
-	qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
-	msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
-	q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-				      &c2dev->rep_vq.host_dma, GFP_KERNEL);
-	if (!q1_pages) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-	dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
-	pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
-		 (unsigned long long) c2dev->rep_vq.host_dma);
-	c2_mq_rep_init(&c2dev->rep_vq,
-		   1,
-		   qsize,
-		   msgsize,
-		   q1_pages,
-		   mmio_regs +
-		   be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
-		   C2_MQ_HOST_TARGET);
-
-	/* Initialize the Asynchronus Event Queue */
-	qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
-	msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
-	q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-				      &c2dev->aeq.host_dma, GFP_KERNEL);
-	if (!q2_pages) {
-		err = -ENOMEM;
-		goto bail2;
-	}
-	dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
-	pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
-		 (unsigned long long) c2dev->aeq.host_dma);
-	c2_mq_rep_init(&c2dev->aeq,
-		       2,
-		       qsize,
-		       msgsize,
-		       q2_pages,
-		       mmio_regs +
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
-		       C2_MQ_HOST_TARGET);
-
-	/* Initialize the verbs request allocator */
-	err = vq_init(c2dev);
-	if (err)
-		goto bail3;
-
-	/* Enable interrupts on the adapter */
-	writel(0, c2dev->regs + C2_IDIS);
-
-	/* create the WR init message */
-	err = c2_adapter_init(c2dev);
-	if (err)
-		goto bail4;
-	c2dev->init++;
-
-	/* open an adapter instance */
-	err = c2_rnic_open(c2dev);
-	if (err)
-		goto bail4;
-
-	/* Initialize cached the adapter limits */
-	err = c2_rnic_query(c2dev, &c2dev->props);
-	if (err)
-		goto bail5;
-
-	/* Initialize the PD pool */
-	err = c2_init_pd_table(c2dev);
-	if (err)
-		goto bail5;
-
-	/* Initialize the QP pool */
-	c2_init_qp_table(c2dev);
-	return 0;
-
-bail5:
-	c2_rnic_close(c2dev);
-bail4:
-	vq_term(c2dev);
-bail3:
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->aeq.q_size * c2dev->aeq.msg_size,
-			  q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
-bail2:
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-			  q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
-bail1:
-	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-bail0:
-	vfree(c2dev->qptr_array);
-
-	return err;
-}
-
-/*
- * Called by c2_remove to cleanup the RNIC resources.
- */
-void c2_rnic_term(struct c2_dev *c2dev)
-{
-
-	/* Close the open adapter instance */
-	c2_rnic_close(c2dev);
-
-	/* Send the TERM message to the adapter */
-	c2_adapter_term(c2dev);
-
-	/* Disable interrupts on the adapter */
-	writel(1, c2dev->regs + C2_IDIS);
-
-	/* Free the QP pool */
-	c2_cleanup_qp_table(c2dev);
-
-	/* Free the PD pool */
-	c2_cleanup_pd_table(c2dev);
-
-	/* Free the verbs request allocator */
-	vq_term(c2dev);
-
-	/* Free the asynchronus event queue */
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->aeq.q_size * c2dev->aeq.msg_size,
-			  c2dev->aeq.msg_pool.host,
-			  dma_unmap_addr(&c2dev->aeq, mapping));
-
-	/* Free the verbs reply queue */
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-			  c2dev->rep_vq.msg_pool.host,
-			  dma_unmap_addr(&c2dev->rep_vq, mapping));
-
-	/* Free the MQ shared pointer pool */
-	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-
-	/* Free the qptr_array */
-	vfree(c2dev->qptr_array);
-
-	return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_status.h b/drivers/staging/rdma/amso1100/c2_status.h
deleted file mode 100644
index 6ee4aa9..0000000
--- a/drivers/staging/rdma/amso1100/c2_status.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef	_C2_STATUS_H_
-#define _C2_STATUS_H_
-
-/*
- * Verbs Status Codes
- */
-enum c2_status {
-	C2_OK = 0,		/* This must be zero */
-	CCERR_INSUFFICIENT_RESOURCES = 1,
-	CCERR_INVALID_MODIFIER = 2,
-	CCERR_INVALID_MODE = 3,
-	CCERR_IN_USE = 4,
-	CCERR_INVALID_RNIC = 5,
-	CCERR_INTERRUPTED_OPERATION = 6,
-	CCERR_INVALID_EH = 7,
-	CCERR_INVALID_CQ = 8,
-	CCERR_CQ_EMPTY = 9,
-	CCERR_NOT_IMPLEMENTED = 10,
-	CCERR_CQ_DEPTH_TOO_SMALL = 11,
-	CCERR_PD_IN_USE = 12,
-	CCERR_INVALID_PD = 13,
-	CCERR_INVALID_SRQ = 14,
-	CCERR_INVALID_ADDRESS = 15,
-	CCERR_INVALID_NETMASK = 16,
-	CCERR_INVALID_QP = 17,
-	CCERR_INVALID_QP_STATE = 18,
-	CCERR_TOO_MANY_WRS_POSTED = 19,
-	CCERR_INVALID_WR_TYPE = 20,
-	CCERR_INVALID_SGL_LENGTH = 21,
-	CCERR_INVALID_SQ_DEPTH = 22,
-	CCERR_INVALID_RQ_DEPTH = 23,
-	CCERR_INVALID_ORD = 24,
-	CCERR_INVALID_IRD = 25,
-	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
-	CCERR_INVALID_STAG = 27,
-	CCERR_QP_IN_USE = 28,
-	CCERR_OUTSTANDING_WRS = 29,
-	CCERR_STAG_IN_USE = 30,
-	CCERR_INVALID_STAG_INDEX = 31,
-	CCERR_INVALID_SGL_FORMAT = 32,
-	CCERR_ADAPTER_TIMEOUT = 33,
-	CCERR_INVALID_CQ_DEPTH = 34,
-	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
-	CCERR_INVALID_EP = 36,
-	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
-	CCERR_FLUSHED = 38,
-	CCERR_INVALID_WQE = 39,
-	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
-	CCERR_REMOTE_TERMINATION_ERROR = 41,
-	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
-	CCERR_ACCESS_VIOLATION = 43,
-	CCERR_INVALID_PD_ID = 44,
-	CCERR_WRAP_ERROR = 45,
-	CCERR_INV_STAG_ACCESS_ERROR = 46,
-	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
-	CCERR_QP_NOT_PRIVILEGED = 48,
-	CCERR_STAG_STATE_NOT_INVALID = 49,
-	CCERR_INVALID_PAGE_SIZE = 50,
-	CCERR_INVALID_BUFFER_SIZE = 51,
-	CCERR_INVALID_PBE = 52,
-	CCERR_INVALID_FBO = 53,
-	CCERR_INVALID_LENGTH = 54,
-	CCERR_INVALID_ACCESS_RIGHTS = 55,
-	CCERR_PBL_TOO_BIG = 56,
-	CCERR_INVALID_VA = 57,
-	CCERR_INVALID_REGION = 58,
-	CCERR_INVALID_WINDOW = 59,
-	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
-	CCERR_INVALID_QP_ID = 61,
-	CCERR_ADDR_IN_USE = 62,
-	CCERR_ADDR_NOT_AVAIL = 63,
-	CCERR_NET_DOWN = 64,
-	CCERR_NET_UNREACHABLE = 65,
-	CCERR_CONN_ABORTED = 66,
-	CCERR_CONN_RESET = 67,
-	CCERR_NO_BUFS = 68,
-	CCERR_CONN_TIMEDOUT = 69,
-	CCERR_CONN_REFUSED = 70,
-	CCERR_HOST_UNREACHABLE = 71,
-	CCERR_INVALID_SEND_SGL_DEPTH = 72,
-	CCERR_INVALID_RECV_SGL_DEPTH = 73,
-	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
-	CCERR_INSUFFICIENT_PRIVILEGES = 75,
-	CCERR_STACK_ERROR = 76,
-	CCERR_INVALID_VERSION = 77,
-	CCERR_INVALID_MTU = 78,
-	CCERR_INVALID_IMAGE = 79,
-	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
-	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
-	CCERR_FAILED_WRITE = 100,
-	CCERR_FAILED_ERASE = 101,
-	CCERR_FAILED_VERIFICATION = 102,
-	CCERR_NOT_FOUND = 103,
-
-};
-
-/*
- * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
- */
-enum c2_connect_status {
-	C2_CONN_STATUS_SUCCESS = C2_OK,
-	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
-	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
-	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
-	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
-	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
-	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
-	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
-	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
-	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
-	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
-};
-
-/*
- * Flash programming status codes.
- */
-enum c2_flash_status {
-	C2_FLASH_STATUS_SUCCESS = 0x0000,
-	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
-	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
-	C2_FLASH_STATUS_ECLBS = 0x0400,
-	C2_FLASH_STATUS_PSLBS = 0x0800,
-	C2_FLASH_STATUS_VPENS = 0x1000,
-};
-
-#endif				/* _C2_STATUS_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_user.h b/drivers/staging/rdma/amso1100/c2_user.h
deleted file mode 100644
index 7e9e7ad..0000000
--- a/drivers/staging/rdma/amso1100/c2_user.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_USER_H
-#define C2_USER_H
-
-#include <linux/types.h>
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct c2_alloc_ucontext_resp {
-	__u32 qp_tab_size;
-	__u32 uarc_size;
-};
-
-struct c2_alloc_pd_resp {
-	__u32 pdn;
-	__u32 reserved;
-};
-
-struct c2_create_cq {
-	__u32 lkey;
-	__u32 pdn;
-	__u64 arm_db_page;
-	__u64 set_db_page;
-	__u32 arm_db_index;
-	__u32 set_db_index;
-};
-
-struct c2_create_cq_resp {
-	__u32 cqn;
-	__u32 reserved;
-};
-
-struct c2_create_qp {
-	__u32 lkey;
-	__u32 reserved;
-	__u64 sq_db_page;
-	__u64 rq_db_page;
-	__u32 sq_db_index;
-	__u32 rq_db_index;
-};
-
-#endif				/* C2_USER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_vq.c b/drivers/staging/rdma/amso1100/c2_vq.c
deleted file mode 100644
index 2ec716f..0000000
--- a/drivers/staging/rdma/amso1100/c2_vq.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include "c2_vq.h"
-#include "c2_provider.h"
-
-/*
- * Verbs Request Objects:
- *
- * VQ Request Objects are allocated by the kernel verbs handlers.
- * They contain a wait object, a refcnt, an atomic bool indicating that the
- * adapter has replied, and a copy of the verb reply work request.
- * A pointer to the VQ Request Object is passed down in the context
- * field of the work request message, and reflected back by the adapter
- * in the verbs reply message.  The function handle_vq() in the interrupt
- * path will use this pointer to:
- * 	1) append a copy of the verbs reply message
- * 	2) mark that the reply is ready
- * 	3) wake up the kernel verbs handler blocked awaiting the reply.
- *
- *
- * The kernel verbs handlers do a "get" to put a 2nd reference on the
- * VQ Request object.  If the kernel verbs handler exits before the adapter
- * can respond, this extra reference will keep the VQ Request object around
- * until the adapter's reply can be processed.  The reason we need this is
- * because a pointer to this object is stuffed into the context field of
- * the verbs work request message, and reflected back in the reply message.
- * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
- * kernel verb handler that is blocked awaiting the verb reply.
- * So handle_vq() will do a "put" on the object when it's done accessing it.
- * NOTE:  If we guarantee that the kernel verb handler will never bail before
- *        getting the reply, then we don't need these refcnts.
- *
- *
- * VQ Request objects are freed by the kernel verbs handlers only
- * after the verb has been processed, or when the adapter fails and
- * does not reply.
- *
- *
- * Verbs Reply Buffers:
- *
- * VQ Reply bufs are local host memory copies of a
- * outstanding Verb Request reply
- * message.  The are always allocated by the kernel verbs handlers, and _may_ be
- * freed by either the kernel verbs handler -or- the interrupt handler.  The
- * kernel verbs handler _must_ free the repbuf, then free the vq request object
- * in that order.
- */
-
-int vq_init(struct c2_dev *c2dev)
-{
-	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
-		(char) ('0' + c2dev->devnum));
-	c2dev->host_msg_cache =
-	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
-			      SLAB_HWCACHE_ALIGN, NULL);
-	if (c2dev->host_msg_cache == NULL) {
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void vq_term(struct c2_dev *c2dev)
-{
-	kmem_cache_destroy(c2dev->host_msg_cache);
-}
-
-/* vq_req_alloc - allocate a VQ Request Object and initialize it.
- * The refcnt is set to 1.
- */
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
-{
-	struct c2_vq_req *r;
-
-	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
-	if (r) {
-		init_waitqueue_head(&r->wait_object);
-		r->reply_msg = 0;
-		r->event = 0;
-		r->cm_id = NULL;
-		r->qp = NULL;
-		atomic_set(&r->refcnt, 1);
-		atomic_set(&r->reply_ready, 0);
-	}
-	return r;
-}
-
-
-/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
- * has already free the VQ Reply Buffer if it existed.
- */
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-	r->reply_msg = 0;
-	if (atomic_dec_and_test(&r->refcnt)) {
-		kfree(r);
-	}
-}
-
-/* vq_req_get - reference a VQ Request Object.  Done
- * only in the kernel verbs handlers.
- */
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-	atomic_inc(&r->refcnt);
-}
-
-
-/* vq_req_put - dereference and potentially free a VQ Request Object.
- *
- * This is only called by handle_vq() on the
- * interrupt when it is done processing
- * a verb reply message.  If the associated
- * kernel verbs handler has already bailed,
- * then this put will actually free the VQ
- * Request object _and_ the VQ Reply Buffer
- * if it exists.
- */
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-	if (atomic_dec_and_test(&r->refcnt)) {
-		if (r->reply_msg != 0)
-			vq_repbuf_free(c2dev,
-				       (void *) (unsigned long) r->reply_msg);
-		kfree(r);
-	}
-}
-
-
-/*
- * vq_repbuf_alloc - allocate a VQ Reply Buffer.
- */
-void *vq_repbuf_alloc(struct c2_dev *c2dev)
-{
-	return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
-}
-
-/*
- * vq_send_wr - post a verbs request message to the Verbs Request Queue.
- * If a message is not available in the MQ, then block until one is available.
- * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
- * When the adapter drains the Verbs Request Queue,
- * it inserts MQ index 0 in to the
- * adapter->host activity fifo and interrupts the host.
- */
-int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
-{
-	void *msg;
-	wait_queue_t __wait;
-
-	/*
-	 * grab adapter vq lock
-	 */
-	spin_lock(&c2dev->vqlock);
-
-	/*
-	 * allocate msg
-	 */
-	msg = c2_mq_alloc(&c2dev->req_vq);
-
-	/*
-	 * If we cannot get a msg, then we'll wait
-	 * When a messages are available, the int handler will wake_up()
-	 * any waiters.
-	 */
-	while (msg == NULL) {
-		pr_debug("%s:%d no available msg in VQ, waiting...\n",
-		       __func__, __LINE__);
-		init_waitqueue_entry(&__wait, current);
-		add_wait_queue(&c2dev->req_vq_wo, &__wait);
-		spin_unlock(&c2dev->vqlock);
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (!c2_mq_full(&c2dev->req_vq)) {
-				break;
-			}
-			if (!signal_pending(current)) {
-				schedule_timeout(1 * HZ);	/* 1 second... */
-				continue;
-			}
-			set_current_state(TASK_RUNNING);
-			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-			return -EINTR;
-		}
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-		spin_lock(&c2dev->vqlock);
-		msg = c2_mq_alloc(&c2dev->req_vq);
-	}
-
-	/*
-	 * copy wr into adapter msg
-	 */
-	memcpy(msg, wr, c2dev->req_vq.msg_size);
-
-	/*
-	 * post msg
-	 */
-	c2_mq_produce(&c2dev->req_vq);
-
-	/*
-	 * release adapter vq lock
-	 */
-	spin_unlock(&c2dev->vqlock);
-	return 0;
-}
-
-
-/*
- * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
- */
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
-{
-	if (!wait_event_timeout(req->wait_object,
-				atomic_read(&req->reply_ready),
-				60*HZ))
-		return -ETIMEDOUT;
-
-	return 0;
-}
-
-/*
- * vq_repbuf_free - Free a Verbs Reply Buffer.
- */
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
-{
-	kmem_cache_free(c2dev->host_msg_cache, reply);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_vq.h b/drivers/staging/rdma/amso1100/c2_vq.h
deleted file mode 100644
index c1f6cef..0000000
--- a/drivers/staging/rdma/amso1100/c2_vq.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_VQ_H_
-#define _C2_VQ_H_
-#include <linux/sched.h>
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_provider.h"
-
-struct c2_vq_req {
-	u64 reply_msg;		/* ptr to reply msg */
-	wait_queue_head_t wait_object;	/* wait object for vq reqs */
-	atomic_t reply_ready;	/* set when reply is ready */
-	atomic_t refcnt;	/* used to cancel WRs... */
-	int event;
-	struct iw_cm_id *cm_id;
-	struct c2_qp *qp;
-};
-
-int vq_init(struct c2_dev *c2dev);
-void vq_term(struct c2_dev *c2dev);
-
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
-int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
-
-void *vq_repbuf_alloc(struct c2_dev *c2dev);
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
-
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
-#endif				/* _C2_VQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_wr.h b/drivers/staging/rdma/amso1100/c2_wr.h
deleted file mode 100644
index 8d4b4ca..0000000
--- a/drivers/staging/rdma/amso1100/c2_wr.h
+++ /dev/null
@@ -1,1520 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_WR_H_
-#define _C2_WR_H_
-
-#ifdef CCDEBUG
-#define CCWR_MAGIC		0xb07700b0
-#endif
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-/* Maximum allowed size in bytes of private_data exchange
- * on connect.
- */
-#define C2_MAX_PRIVATE_DATA_SIZE 200
-
-/*
- * These types are shared among the adapter, host, and CCIL consumer.
- */
-enum c2_cq_notification_type {
-	C2_CQ_NOTIFICATION_TYPE_NONE = 1,
-	C2_CQ_NOTIFICATION_TYPE_NEXT,
-	C2_CQ_NOTIFICATION_TYPE_NEXT_SE
-};
-
-enum c2_setconfig_cmd {
-	C2_CFG_ADD_ADDR = 1,
-	C2_CFG_DEL_ADDR = 2,
-	C2_CFG_ADD_ROUTE = 3,
-	C2_CFG_DEL_ROUTE = 4
-};
-
-enum c2_getconfig_cmd {
-	C2_GETCONFIG_ROUTES = 1,
-	C2_GETCONFIG_ADDRS
-};
-
-/*
- *  CCIL Work Request Identifiers
- */
-enum c2wr_ids {
-	CCWR_RNIC_OPEN = 1,
-	CCWR_RNIC_QUERY,
-	CCWR_RNIC_SETCONFIG,
-	CCWR_RNIC_GETCONFIG,
-	CCWR_RNIC_CLOSE,
-	CCWR_CQ_CREATE,
-	CCWR_CQ_QUERY,
-	CCWR_CQ_MODIFY,
-	CCWR_CQ_DESTROY,
-	CCWR_QP_CONNECT,
-	CCWR_PD_ALLOC,
-	CCWR_PD_DEALLOC,
-	CCWR_SRQ_CREATE,
-	CCWR_SRQ_QUERY,
-	CCWR_SRQ_MODIFY,
-	CCWR_SRQ_DESTROY,
-	CCWR_QP_CREATE,
-	CCWR_QP_QUERY,
-	CCWR_QP_MODIFY,
-	CCWR_QP_DESTROY,
-	CCWR_NSMR_STAG_ALLOC,
-	CCWR_NSMR_REGISTER,
-	CCWR_NSMR_PBL,
-	CCWR_STAG_DEALLOC,
-	CCWR_NSMR_REREGISTER,
-	CCWR_SMR_REGISTER,
-	CCWR_MR_QUERY,
-	CCWR_MW_ALLOC,
-	CCWR_MW_QUERY,
-	CCWR_EP_CREATE,
-	CCWR_EP_GETOPT,
-	CCWR_EP_SETOPT,
-	CCWR_EP_DESTROY,
-	CCWR_EP_BIND,
-	CCWR_EP_CONNECT,
-	CCWR_EP_LISTEN,
-	CCWR_EP_SHUTDOWN,
-	CCWR_EP_LISTEN_CREATE,
-	CCWR_EP_LISTEN_DESTROY,
-	CCWR_EP_QUERY,
-	CCWR_CR_ACCEPT,
-	CCWR_CR_REJECT,
-	CCWR_CONSOLE,
-	CCWR_TERM,
-	CCWR_FLASH_INIT,
-	CCWR_FLASH,
-	CCWR_BUF_ALLOC,
-	CCWR_BUF_FREE,
-	CCWR_FLASH_WRITE,
-	CCWR_INIT,		/* WARNING: Don't move this ever again! */
-
-
-
-	/* Add new IDs here */
-
-
-
-	/*
-	 * WARNING: CCWR_LAST must always be the last verbs id defined!
-	 *          All the preceding IDs are fixed, and must not change.
-	 *          You can add new IDs, but must not remove or reorder
-	 *          any IDs. If you do, YOU will ruin any hope of
-	 *          compatibility between versions.
-	 */
-	CCWR_LAST,
-
-	/*
-	 * Start over at 1 so that arrays indexed by user wr id's
-	 * begin at 1.  This is OK since the verbs and user wr id's
-	 * are always used on disjoint sets of queues.
-	 */
-	/*
-	 * The order of the CCWR_SEND_XX verbs must
-	 * match the order of the RDMA_OPs
-	 */
-	CCWR_SEND = 1,
-	CCWR_SEND_INV,
-	CCWR_SEND_SE,
-	CCWR_SEND_SE_INV,
-	CCWR_RDMA_WRITE,
-	CCWR_RDMA_READ,
-	CCWR_RDMA_READ_INV,
-	CCWR_MW_BIND,
-	CCWR_NSMR_FASTREG,
-	CCWR_STAG_INVALIDATE,
-	CCWR_RECV,
-	CCWR_NOP,
-	CCWR_UNIMPL,
-/* WARNING: This must always be the last user wr id defined! */
-};
-#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
-
-/*
- * SQ/RQ Work Request Types
- */
-enum c2_wr_type {
-	C2_WR_TYPE_SEND = CCWR_SEND,
-	C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
-	C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
-	C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
-	C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
-	C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
-	C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
-	C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
-	C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
-	C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
-	C2_WR_TYPE_RECV = CCWR_RECV,
-	C2_WR_TYPE_NOP = CCWR_NOP,
-};
-
-struct c2_netaddr {
-	__be32 ip_addr;
-	__be32 netmask;
-	u32 mtu;
-};
-
-struct c2_route {
-	u32 ip_addr;		/* 0 indicates the default route */
-	u32 netmask;		/* netmask associated with dst */
-	u32 flags;
-	union {
-		u32 ipaddr;	/* address of the nexthop interface */
-		u8 enaddr[6];
-	} nexthop;
-};
-
-/*
- * A Scatter Gather Entry.
- */
-struct c2_data_addr {
-	__be32 stag;
-	__be32 length;
-	__be64 to;
-};
-
-/*
- * MR and MW flags used by the consumer, RI, and RNIC.
- */
-enum c2_mm_flags {
-	MEM_REMOTE = 0x0001,	/* allow mw binds with remote access. */
-	MEM_VA_BASED = 0x0002,	/* Not Zero-based */
-	MEM_PBL_COMPLETE = 0x0004,	/* PBL array is complete in this msg */
-	MEM_LOCAL_READ = 0x0008,	/* allow local reads */
-	MEM_LOCAL_WRITE = 0x0010,	/* allow local writes */
-	MEM_REMOTE_READ = 0x0020,	/* allow remote reads */
-	MEM_REMOTE_WRITE = 0x0040,	/* allow remote writes */
-	MEM_WINDOW_BIND = 0x0080,	/* binds allowed */
-	MEM_SHARED = 0x0100,	/* set if MR is shared */
-	MEM_STAG_VALID = 0x0200	/* set if STAG is in valid state */
-};
-
-/*
- * CCIL API ACF flags defined in terms of the low level mem flags.
- * This minimizes translation needed in the user API
- */
-enum c2_acf {
-	C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
-	C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
-	C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
-	C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
-	C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
-};
-
-/*
- * Image types of objects written to flash
- */
-#define C2_FLASH_IMG_BITFILE 1
-#define C2_FLASH_IMG_OPTION_ROM 2
-#define C2_FLASH_IMG_VPD 3
-
-/*
- *  to fix bug 1815 we define the max size allowable of the
- *  terminate message (per the IETF spec).Refer to the IETF
- *  protocol specification, section 12.1.6, page 64)
- *  The message is prefixed by 20 types of DDP info.
- *
- *  Then the message has 6 bytes for the terminate control
- *  and DDP segment length info plus a DDP header (either
- *  14 or 18 byts) plus 28 bytes for the RDMA header.
- *  Thus the max size in:
- *  20 + (6 + 18 + 28) = 72
- */
-#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
-
-/*
- * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
- */
-#define WR_BUILD_STR_LEN 64
-
-/*
- * WARNING:  All of these structs need to align any 64bit types on
- * 64 bit boundaries!  64bit types include u64 and u64.
- */
-
-/*
- * Clustercore Work Request Header.  Be sensitive to field layout
- * and alignment.
- */
-struct c2wr_hdr {
-	/* wqe_count is part of the cqe.  It is put here so the
-	 * adapter can write to it while the wr is pending without
-	 * clobbering part of the wr.  This word need not be dma'd
-	 * from the host to adapter by libccil, but we copy it anyway
-	 * to make the memcpy to the adapter better aligned.
-	 */
-	__be32 wqe_count;
-
-	/* Put these fields next so that later 32- and 64-bit
-	 * quantities are naturally aligned.
-	 */
-	u8 id;
-	u8 result;		/* adapter -> host */
-	u8 sge_count;		/* host -> adapter */
-	u8 flags;		/* host -> adapter */
-
-	u64 context;
-#ifdef CCMSGMAGIC
-	u32 magic;
-	u32 pad;
-#endif
-} __attribute__((packed));
-
-/*
- *------------------------ RNIC ------------------------
- */
-
-/*
- * WR_RNIC_OPEN
- */
-
-/*
- * Flags for the RNIC WRs
- */
-enum c2_rnic_flags {
-	RNIC_IRD_STATIC = 0x0001,
-	RNIC_ORD_STATIC = 0x0002,
-	RNIC_QP_STATIC = 0x0004,
-	RNIC_SRQ_SUPPORTED = 0x0008,
-	RNIC_PBL_BLOCK_MODE = 0x0010,
-	RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
-	RNIC_CQ_OVF_DETECTED = 0x0040,
-	RNIC_PRIV_MODE = 0x0080
-};
-
-struct c2wr_rnic_open_req {
-	struct c2wr_hdr hdr;
-	u64 user_context;
-	__be16 flags;		/* See enum c2_rnic_flags */
-	__be16 port_num;
-} __attribute__((packed));
-
-struct c2wr_rnic_open_rep {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed));
-
-union c2wr_rnic_open {
-	struct c2wr_rnic_open_req req;
-	struct c2wr_rnic_open_rep rep;
-} __attribute__((packed));
-
-struct c2wr_rnic_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_QUERY
- */
-struct c2wr_rnic_query_rep {
-	struct c2wr_hdr hdr;
-	u64 user_context;
-	__be32 vendor_id;
-	__be32 part_number;
-	__be32 hw_version;
-	__be32 fw_ver_major;
-	__be32 fw_ver_minor;
-	__be32 fw_ver_patch;
-	char fw_ver_build_str[WR_BUILD_STR_LEN];
-	__be32 max_qps;
-	__be32 max_qp_depth;
-	u32 max_srq_depth;
-	u32 max_send_sgl_depth;
-	u32 max_rdma_sgl_depth;
-	__be32 max_cqs;
-	__be32 max_cq_depth;
-	u32 max_cq_event_handlers;
-	__be32 max_mrs;
-	u32 max_pbl_depth;
-	__be32 max_pds;
-	__be32 max_global_ird;
-	u32 max_global_ord;
-	__be32 max_qp_ird;
-	__be32 max_qp_ord;
-	u32 flags;
-	__be32 max_mws;
-	u32 pbe_range_low;
-	u32 pbe_range_high;
-	u32 max_srqs;
-	u32 page_size;
-} __attribute__((packed));
-
-union c2wr_rnic_query {
-	struct c2wr_rnic_query_req req;
-	struct c2wr_rnic_query_rep rep;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_GETCONFIG
- */
-
-struct c2wr_rnic_getconfig_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 option;		/* see c2_getconfig_cmd_t */
-	u64 reply_buf;
-	u32 reply_buf_len;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_getconfig_rep {
-	struct c2wr_hdr hdr;
-	u32 option;		/* see c2_getconfig_cmd_t */
-	u32 count_len;		/* length of the number of addresses configured */
-} __attribute__((packed)) ;
-
-union c2wr_rnic_getconfig {
-	struct c2wr_rnic_getconfig_req req;
-	struct c2wr_rnic_getconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_SETCONFIG
- */
-struct c2wr_rnic_setconfig_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	__be32 option;		/* See c2_setconfig_cmd_t */
-	/* variable data and pad. See c2_netaddr and c2_route */
-	u8 data[0];
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_setconfig_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_setconfig {
-	struct c2wr_rnic_setconfig_req req;
-	struct c2wr_rnic_setconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_CLOSE
- */
-struct c2wr_rnic_close_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_close_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_close {
-	struct c2wr_rnic_close_req req;
-	struct c2wr_rnic_close_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ CQ ------------------------
- */
-struct c2wr_cq_create_req {
-	struct c2wr_hdr hdr;
-	__be64 shared_ht;
-	u64 user_context;
-	__be64 msg_pool;
-	u32 rnic_handle;
-	__be32 msg_size;
-	__be32 depth;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_create_rep {
-	struct c2wr_hdr hdr;
-	__be32 mq_index;
-	__be32 adapter_shared;
-	u32 cq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_cq_create {
-	struct c2wr_cq_create_req req;
-	struct c2wr_cq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 cq_handle;
-	u32 new_depth;
-	u64 new_msg_pool;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_modify {
-	struct c2wr_cq_modify_req req;
-	struct c2wr_cq_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 cq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_destroy {
-	struct c2wr_cq_destroy_req req;
-	struct c2wr_cq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ PD ------------------------
- */
-struct c2wr_pd_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_alloc_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_alloc {
-	struct c2wr_pd_alloc_req req;
-	struct c2wr_pd_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_dealloc {
-	struct c2wr_pd_dealloc_req req;
-	struct c2wr_pd_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ SRQ ------------------------
- */
-struct c2wr_srq_create_req {
-	struct c2wr_hdr hdr;
-	u64 shared_ht;
-	u64 user_context;
-	u32 rnic_handle;
-	u32 srq_depth;
-	u32 srq_limit;
-	u32 sgl_depth;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_create_rep {
-	struct c2wr_hdr hdr;
-	u32 srq_depth;
-	u32 sgl_depth;
-	u32 msg_size;
-	u32 mq_index;
-	u32 mq_start;
-	u32 srq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_srq_create {
-	struct c2wr_srq_create_req req;
-	struct c2wr_srq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 srq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_srq_destroy {
-	struct c2wr_srq_destroy_req req;
-	struct c2wr_srq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ QP ------------------------
- */
-enum c2wr_qp_flags {
-	QP_RDMA_READ = 0x00000001,	/* RDMA read enabled? */
-	QP_RDMA_WRITE = 0x00000002,	/* RDMA write enabled? */
-	QP_MW_BIND = 0x00000004,	/* MWs enabled */
-	QP_ZERO_STAG = 0x00000008,	/* enabled? */
-	QP_REMOTE_TERMINATION = 0x00000010,	/* remote end terminated */
-	QP_RDMA_READ_RESPONSE = 0x00000020	/* Remote RDMA read  */
-	    /* enabled? */
-};
-
-struct c2wr_qp_create_req {
-	struct c2wr_hdr hdr;
-	__be64 shared_sq_ht;
-	__be64 shared_rq_ht;
-	u64 user_context;
-	u32 rnic_handle;
-	u32 sq_cq_handle;
-	u32 rq_cq_handle;
-	__be32 sq_depth;
-	__be32 rq_depth;
-	u32 srq_handle;
-	u32 srq_limit;
-	__be32 flags;		/* see enum c2wr_qp_flags */
-	__be32 send_sgl_depth;
-	__be32 recv_sgl_depth;
-	__be32 rdma_write_sgl_depth;
-	__be32 ord;
-	__be32 ird;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_create_rep {
-	struct c2wr_hdr hdr;
-	__be32 sq_depth;
-	__be32 rq_depth;
-	u32 send_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u32 ord;
-	u32 ird;
-	__be32 sq_msg_size;
-	__be32 sq_mq_index;
-	__be32 sq_mq_start;
-	__be32 rq_msg_size;
-	__be32 rq_mq_index;
-	__be32 rq_mq_start;
-	u32 qp_handle;
-} __attribute__((packed)) ;
-
-union c2wr_qp_create {
-	struct c2wr_qp_create_req req;
-	struct c2wr_qp_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_rep {
-	struct c2wr_hdr hdr;
-	u64 user_context;
-	u32 rnic_handle;
-	u32 sq_depth;
-	u32 rq_depth;
-	u32 send_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 ord;
-	u32 ird;
-	u16 qp_state;
-	u16 flags;		/* see c2wr_qp_flags_t */
-	u32 qp_id;
-	u32 local_addr;
-	u32 remote_addr;
-	u16 local_port;
-	u16 remote_port;
-	u32 terminate_msg_length;	/* 0 if not present */
-	u8 data[0];
-	/* Terminate Message in-line here. */
-} __attribute__((packed)) ;
-
-union c2wr_qp_query {
-	struct c2wr_qp_query_req req;
-	struct c2wr_qp_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_req {
-	struct c2wr_hdr hdr;
-	u64 stream_msg;
-	u32 stream_msg_length;
-	u32 rnic_handle;
-	u32 qp_handle;
-	__be32 next_qp_state;
-	__be32 ord;
-	__be32 ird;
-	__be32 sq_depth;
-	__be32 rq_depth;
-	u32 llp_ep_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_rep {
-	struct c2wr_hdr hdr;
-	u32 ord;
-	u32 ird;
-	u32 sq_depth;
-	u32 rq_depth;
-	u32 sq_msg_size;
-	u32 sq_mq_index;
-	u32 sq_mq_start;
-	u32 rq_msg_size;
-	u32 rq_mq_index;
-	u32 rq_mq_start;
-} __attribute__((packed)) ;
-
-union c2wr_qp_modify {
-	struct c2wr_qp_modify_req req;
-	struct c2wr_qp_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_qp_destroy {
-	struct c2wr_qp_destroy_req req;
-	struct c2wr_qp_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
- * only be posted when a QP is in IDLE state.  After the connect request is
- * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
- * No synchronous reply from adapter to this WR.  The results of
- * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
- * See c2wr_ae_active_connect_results_t
- */
-struct c2wr_qp_connect_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;
-	__be32 remote_addr;
-	__be16 remote_port;
-	u16 pad;
-	__be32 private_data_length;
-	u8 private_data[0];	/* Private data in-line. */
-} __attribute__((packed)) ;
-
-struct c2wr_qp_connect {
-	struct c2wr_qp_connect_req req;
-	/* no synchronous reply.         */
-} __attribute__((packed)) ;
-
-
-/*
- *------------------------ MM ------------------------
- */
-
-struct c2wr_nsmr_stag_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pbl_depth;
-	u32 pd_id;
-	u32 flags;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_stag_alloc_rep {
-	struct c2wr_hdr hdr;
-	u32 pbl_depth;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_stag_alloc {
-	struct c2wr_nsmr_stag_alloc_req req;
-	struct c2wr_nsmr_stag_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_req {
-	struct c2wr_hdr hdr;
-	__be64 va;
-	u32 rnic_handle;
-	__be16 flags;
-	u8 stag_key;
-	u8 pad;
-	u32 pd_id;
-	__be32 pbl_depth;
-	__be32 pbe_size;
-	__be32 fbo;
-	__be32 length;
-	__be32 addrs_length;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	__be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_rep {
-	struct c2wr_hdr hdr;
-	u32 pbl_depth;
-	__be32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_register {
-	struct c2wr_nsmr_register_req req;
-	struct c2wr_nsmr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	__be32 flags;
-	__be32 stag_index;
-	__be32 addrs_length;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	__be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_pbl {
-	struct c2wr_nsmr_pbl_req req;
-	struct c2wr_nsmr_pbl_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_rep {
-	struct c2wr_hdr hdr;
-	u8 stag_key;
-	u8 pad[3];
-	u32 pd_id;
-	u32 flags;
-	u32 pbl_depth;
-} __attribute__((packed)) ;
-
-union c2wr_mr_query {
-	struct c2wr_mr_query_req req;
-	struct c2wr_mr_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_rep {
-	struct c2wr_hdr hdr;
-	u8 stag_key;
-	u8 pad[3];
-	u32 pd_id;
-	u32 flags;
-} __attribute__((packed)) ;
-
-union c2wr_mw_query {
-	struct c2wr_mw_query_req req;
-	struct c2wr_mw_query_rep rep;
-} __attribute__((packed)) ;
-
-
-struct c2wr_stag_dealloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	__be32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_stag_dealloc_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_stag_dealloc {
-	struct c2wr_stag_dealloc_req req;
-	struct c2wr_stag_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_req {
-	struct c2wr_hdr hdr;
-	u64 va;
-	u32 rnic_handle;
-	u16 flags;
-	u8 stag_key;
-	u8 pad;
-	u32 stag_index;
-	u32 pd_id;
-	u32 pbl_depth;
-	u32 pbe_size;
-	u32 fbo;
-	u32 length;
-	u32 addrs_length;
-	u32 pad1;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	u64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_rep {
-	struct c2wr_hdr hdr;
-	u32 pbl_depth;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_reregister {
-	struct c2wr_nsmr_reregister_req req;
-	struct c2wr_nsmr_reregister_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_req {
-	struct c2wr_hdr hdr;
-	u64 va;
-	u32 rnic_handle;
-	u16 flags;
-	u8 stag_key;
-	u8 pad;
-	u32 stag_index;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_rep {
-	struct c2wr_hdr hdr;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_smr_register {
-	struct c2wr_smr_register_req req;
-	struct c2wr_smr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_rep {
-	struct c2wr_hdr hdr;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_mw_alloc {
-	struct c2wr_mw_alloc_req req;
-	struct c2wr_mw_alloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ WRs -----------------------
- */
-
-struct c2wr_user_hdr {
-	struct c2wr_hdr hdr;		/* Has status and WR Type */
-} __attribute__((packed)) ;
-
-enum c2_qp_state {
-	C2_QP_STATE_IDLE = 0x01,
-	C2_QP_STATE_CONNECTING = 0x02,
-	C2_QP_STATE_RTS = 0x04,
-	C2_QP_STATE_CLOSING = 0x08,
-	C2_QP_STATE_TERMINATE = 0x10,
-	C2_QP_STATE_ERROR = 0x20,
-};
-
-/* Completion queue entry. */
-struct c2wr_ce {
-	struct c2wr_hdr hdr;		/* Has status and WR Type */
-	u64 qp_user_context;	/* c2_user_qp_t * */
-	u32 qp_state;		/* Current QP State */
-	u32 handle;		/* QPID or EP Handle */
-	__be32 bytes_rcvd;		/* valid for RECV WCs */
-	u32 stag;
-} __attribute__((packed)) ;
-
-
-/*
- * Flags used for all post-sq WRs.  These must fit in the flags
- * field of the struct c2wr_hdr (eight bits).
- */
-enum {
-	SQ_SIGNALED = 0x01,
-	SQ_READ_FENCE = 0x02,
-	SQ_FENCE = 0x04,
-};
-
-/*
- * Common fields for all post-sq WRs.  Namely the standard header and a
- * secondary header with fields common to all post-sq WRs.
- */
-struct c2_sq_hdr {
-	struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * Same as above but for post-rq WRs.
- */
-struct c2_rq_hdr {
-	struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * use the same struct for all sends.
- */
-struct c2wr_send_req {
-	struct c2_sq_hdr sq_hdr;
-	__be32 sge_len;
-	__be32 remote_stag;
-	u8 data[0];		/* SGE array */
-} __attribute__((packed));
-
-union c2wr_send {
-	struct c2wr_send_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_write_req {
-	struct c2_sq_hdr sq_hdr;
-	__be64 remote_to;
-	__be32 remote_stag;
-	__be32 sge_len;
-	u8 data[0];		/* SGE array */
-} __attribute__((packed));
-
-union c2wr_rdma_write {
-	struct c2wr_rdma_write_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_read_req {
-	struct c2_sq_hdr sq_hdr;
-	__be64 local_to;
-	__be64 remote_to;
-	__be32 local_stag;
-	__be32 remote_stag;
-	__be32 length;
-} __attribute__((packed));
-
-union c2wr_rdma_read {
-	struct c2wr_rdma_read_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_mw_bind_req {
-	struct c2_sq_hdr sq_hdr;
-	u64 va;
-	u8 stag_key;
-	u8 pad[3];
-	u32 mw_stag_index;
-	u32 mr_stag_index;
-	u32 length;
-	u32 flags;
-} __attribute__((packed));
-
-union c2wr_mw_bind {
-	struct c2wr_mw_bind_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_nsmr_fastreg_req {
-	struct c2_sq_hdr sq_hdr;
-	u64 va;
-	u8 stag_key;
-	u8 pad[3];
-	u32 stag_index;
-	u32 pbe_size;
-	u32 fbo;
-	u32 length;
-	u32 addrs_length;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	u64 paddrs[0];
-} __attribute__((packed));
-
-union c2wr_nsmr_fastreg {
-	struct c2wr_nsmr_fastreg_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_stag_invalidate_req {
-	struct c2_sq_hdr sq_hdr;
-	u8 stag_key;
-	u8 pad[3];
-	u32 stag_index;
-} __attribute__((packed));
-
-union c2wr_stag_invalidate {
-	struct c2wr_stag_invalidate_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-union c2wr_sqwr {
-	struct c2_sq_hdr sq_hdr;
-	struct c2wr_send_req send;
-	struct c2wr_send_req send_se;
-	struct c2wr_send_req send_inv;
-	struct c2wr_send_req send_se_inv;
-	struct c2wr_rdma_write_req rdma_write;
-	struct c2wr_rdma_read_req rdma_read;
-	struct c2wr_mw_bind_req mw_bind;
-	struct c2wr_nsmr_fastreg_req nsmr_fastreg;
-	struct c2wr_stag_invalidate_req stag_inv;
-} __attribute__((packed));
-
-
-/*
- * RQ WRs
- */
-struct c2wr_rqwr {
-	struct c2_rq_hdr rq_hdr;
-	u8 data[0];		/* array of SGEs */
-} __attribute__((packed));
-
-union c2wr_recv {
-	struct c2wr_rqwr req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-/*
- * All AEs start with this header.  Most AEs only need to convey the
- * information in the header.  Some, like LLP connection events, need
- * more info.  The union typdef c2wr_ae_t has all the possible AEs.
- *
- * hdr.context is the user_context from the rnic_open WR.  NULL If this
- * is not affiliated with an rnic
- *
- * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
- * CCAE_LLP_CLOSE_COMPLETE)
- *
- * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
- *
- * user_context is the context passed down when the host created the resource.
- */
-struct c2wr_ae_hdr {
-	struct c2wr_hdr hdr;
-	u64 user_context;	/* user context for this res. */
-	__be32 resource_type;	/* see enum c2_resource_indicator */
-	__be32 resource;	/* handle for resource */
-	__be32 qp_state;	/* current QP State */
-} __attribute__((packed));
-
-/*
- * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
- * the adapter moves the QP into RTS state
- */
-struct c2wr_ae_active_connect_results {
-	struct c2wr_ae_hdr ae_hdr;
-	__be32 laddr;
-	__be32 raddr;
-	__be16 lport;
-	__be16 rport;
-	__be32 private_data_length;
-	u8 private_data[0];	/* data is in-line in the msg. */
-} __attribute__((packed));
-
-/*
- * When connections are established by the stack (and the private data
- * MPA frame is received), the adapter will generate an event to the host.
- * The details of the connection, any private data, and the new connection
- * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
- * AE queue:
- */
-struct c2wr_ae_connection_request {
-	struct c2wr_ae_hdr ae_hdr;
-	u32 cr_handle;		/* connreq handle (sock ptr) */
-	__be32 laddr;
-	__be32 raddr;
-	__be16 lport;
-	__be16 rport;
-	__be32 private_data_length;
-	u8 private_data[0];	/* data is in-line in the msg. */
-} __attribute__((packed));
-
-union c2wr_ae {
-	struct c2wr_ae_hdr ae_generic;
-	struct c2wr_ae_active_connect_results ae_active_connect_results;
-	struct c2wr_ae_connection_request ae_connection_request;
-} __attribute__((packed));
-
-struct c2wr_init_req {
-	struct c2wr_hdr hdr;
-	__be64 hint_count;
-	__be64 q0_host_shared;
-	__be64 q1_host_shared;
-	__be64 q1_host_msg_pool;
-	__be64 q2_host_shared;
-	__be64 q2_host_msg_pool;
-} __attribute__((packed));
-
-struct c2wr_init_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_init {
-	struct c2wr_init_req req;
-	struct c2wr_init_rep rep;
-} __attribute__((packed));
-
-/*
- * For upgrading flash.
- */
-
-struct c2wr_flash_init_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed));
-
-struct c2wr_flash_init_rep {
-	struct c2wr_hdr hdr;
-	u32 adapter_flash_buf_offset;
-	u32 adapter_flash_len;
-} __attribute__((packed));
-
-union c2wr_flash_init {
-	struct c2wr_flash_init_req req;
-	struct c2wr_flash_init_rep rep;
-} __attribute__((packed));
-
-struct c2wr_flash_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 len;
-} __attribute__((packed));
-
-struct c2wr_flash_rep {
-	struct c2wr_hdr hdr;
-	u32 status;
-} __attribute__((packed));
-
-union c2wr_flash {
-	struct c2wr_flash_req req;
-	struct c2wr_flash_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 size;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_rep {
-	struct c2wr_hdr hdr;
-	u32 offset;		/* 0 if mem not available */
-	u32 size;		/* 0 if mem not available */
-} __attribute__((packed));
-
-union c2wr_buf_alloc {
-	struct c2wr_buf_alloc_req req;
-	struct c2wr_buf_alloc_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_free_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 offset;		/* Must match value from alloc */
-	u32 size;		/* Must match value from alloc */
-} __attribute__((packed));
-
-struct c2wr_buf_free_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_buf_free {
-	struct c2wr_buf_free_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_flash_write_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 offset;
-	u32 size;
-	u32 type;
-	u32 flags;
-} __attribute__((packed));
-
-struct c2wr_flash_write_rep {
-	struct c2wr_hdr hdr;
-	u32 status;
-} __attribute__((packed));
-
-union c2wr_flash_write {
-	struct c2wr_flash_write_req req;
-	struct c2wr_flash_write_rep rep;
-} __attribute__((packed));
-
-/*
- * Messages for LLP connection setup.
- */
-
-/*
- * Listen Request.  This allocates a listening endpoint to allow passive
- * connection setup.  Newly established LLP connections are passed up
- * via an AE.  See c2wr_ae_connection_request_t
- */
-struct c2wr_ep_listen_create_req {
-	struct c2wr_hdr hdr;
-	u64 user_context;	/* returned in AEs. */
-	u32 rnic_handle;
-	__be32 local_addr;		/* local addr, or 0  */
-	__be16 local_port;		/* 0 means "pick one" */
-	u16 pad;
-	__be32 backlog;		/* tradional tcp listen bl */
-} __attribute__((packed));
-
-struct c2wr_ep_listen_create_rep {
-	struct c2wr_hdr hdr;
-	u32 ep_handle;		/* handle to new listening ep */
-	u16 local_port;		/* resulting port... */
-	u16 pad;
-} __attribute__((packed));
-
-union c2wr_ep_listen_create {
-	struct c2wr_ep_listen_create_req req;
-	struct c2wr_ep_listen_create_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_ep_listen_destroy {
-	struct c2wr_ep_listen_destroy_req req;
-	struct c2wr_ep_listen_destroy_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_query_rep {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 local_addr;
-	u32 remote_addr;
-	u16 local_port;
-	u16 remote_port;
-} __attribute__((packed));
-
-union c2wr_ep_query {
-	struct c2wr_ep_query_req req;
-	struct c2wr_ep_query_rep rep;
-} __attribute__((packed));
-
-
-/*
- * The host passes this down to indicate acceptance of a pending iWARP
- * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
- * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
- */
-struct c2wr_cr_accept_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;		/* QP to bind to this LLP conn */
-	u32 ep_handle;		/* LLP  handle to accept */
-	__be32 private_data_length;
-	u8 private_data[0];	/* data in-line in msg. */
-} __attribute__((packed));
-
-/*
- * adapter sends reply when private data is successfully submitted to
- * the LLP.
- */
-struct c2wr_cr_accept_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_accept {
-	struct c2wr_cr_accept_req req;
-	struct c2wr_cr_accept_rep rep;
-} __attribute__((packed));
-
-/*
- * The host sends this down if a given iWARP connection request was
- * rejected by the consumer.  The cr_handle was obtained from a
- * previous c2wr_ae_connection_request_t AE sent by the adapter.
- */
-struct  c2wr_cr_reject_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 ep_handle;		/* LLP handle to reject */
-} __attribute__((packed));
-
-/*
- * Dunno if this is needed, but we'll add it for now.  The adapter will
- * send the reject_reply after the LLP endpoint has been destroyed.
- */
-struct  c2wr_cr_reject_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_reject {
-	struct c2wr_cr_reject_req req;
-	struct c2wr_cr_reject_rep rep;
-} __attribute__((packed));
-
-/*
- * console command.  Used to implement a debug console over the verbs
- * request and reply queues.
- */
-
-/*
- * Console request message.  It contains:
- *	- message hdr with id = CCWR_CONSOLE
- *	- the physaddr/len of host memory to be used for the reply.
- *	- the command string.  eg:  "netstat -s" or "zoneinfo"
- */
-struct c2wr_console_req {
-	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
-	u64 reply_buf;		/* pinned host buf for reply */
-	u32 reply_buf_len;	/* length of reply buffer */
-	u8 command[0];		/* NUL terminated ascii string */
-	/* containing the command req */
-} __attribute__((packed));
-
-/*
- * flags used in the console reply.
- */
-enum c2_console_flags {
-	CONS_REPLY_TRUNCATED = 0x00000001	/* reply was truncated */
-} __attribute__((packed));
-
-/*
- * Console reply message.
- * hdr.result contains the c2_status_t error if the reply was _not_ generated,
- * or C2_OK if the reply was generated.
- */
-struct c2wr_console_rep {
-	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
-	u32 flags;
-} __attribute__((packed));
-
-union c2wr_console {
-	struct c2wr_console_req req;
-	struct c2wr_console_rep rep;
-} __attribute__((packed));
-
-
-/*
- * Giant union with all WRs.  Makes life easier...
- */
-union c2wr {
-	struct c2wr_hdr hdr;
-	struct c2wr_user_hdr user_hdr;
-	union c2wr_rnic_open rnic_open;
-	union c2wr_rnic_query rnic_query;
-	union c2wr_rnic_getconfig rnic_getconfig;
-	union c2wr_rnic_setconfig rnic_setconfig;
-	union c2wr_rnic_close rnic_close;
-	union c2wr_cq_create cq_create;
-	union c2wr_cq_modify cq_modify;
-	union c2wr_cq_destroy cq_destroy;
-	union c2wr_pd_alloc pd_alloc;
-	union c2wr_pd_dealloc pd_dealloc;
-	union c2wr_srq_create srq_create;
-	union c2wr_srq_destroy srq_destroy;
-	union c2wr_qp_create qp_create;
-	union c2wr_qp_query qp_query;
-	union c2wr_qp_modify qp_modify;
-	union c2wr_qp_destroy qp_destroy;
-	struct c2wr_qp_connect qp_connect;
-	union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
-	union c2wr_nsmr_register nsmr_register;
-	union c2wr_nsmr_pbl nsmr_pbl;
-	union c2wr_mr_query mr_query;
-	union c2wr_mw_query mw_query;
-	union c2wr_stag_dealloc stag_dealloc;
-	union c2wr_sqwr sqwr;
-	struct c2wr_rqwr rqwr;
-	struct c2wr_ce ce;
-	union c2wr_ae ae;
-	union c2wr_init init;
-	union c2wr_ep_listen_create ep_listen_create;
-	union c2wr_ep_listen_destroy ep_listen_destroy;
-	union c2wr_cr_accept cr_accept;
-	union c2wr_cr_reject cr_reject;
-	union c2wr_console console;
-	union c2wr_flash_init flash_init;
-	union c2wr_flash flash;
-	union c2wr_buf_alloc buf_alloc;
-	union c2wr_buf_free buf_free;
-	union c2wr_flash_write flash_write;
-} __attribute__((packed));
-
-
-/*
- * Accessors for the wr fields that are packed together tightly to
- * reduce the wr message size.  The wr arguments are void* so that
- * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
- * in the struct c2wr union can be passed in.
- */
-static __inline__ u8 c2_wr_get_id(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->id;
-}
-static __inline__ void c2_wr_set_id(void *wr, u8 id)
-{
-	((struct c2wr_hdr *) wr)->id = id;
-}
-static __inline__ u8 c2_wr_get_result(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->result;
-}
-static __inline__ void c2_wr_set_result(void *wr, u8 result)
-{
-	((struct c2wr_hdr *) wr)->result = result;
-}
-static __inline__ u8 c2_wr_get_flags(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->flags;
-}
-static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
-{
-	((struct c2wr_hdr *) wr)->flags = flags;
-}
-static __inline__ u8 c2_wr_get_sge_count(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->sge_count;
-}
-static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
-{
-	((struct c2wr_hdr *) wr)->sge_count = sge_count;
-}
-static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->wqe_count;
-}
-static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
-{
-	((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
-}
-
-#endif				/* _C2_WR_H_ */
diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig
deleted file mode 100644
index 3fadd2a..0000000
--- a/drivers/staging/rdma/ehca/Kconfig
+++ /dev/null
@@ -1,10 +0,0 @@
-config INFINIBAND_EHCA
-	tristate "eHCA support"
-	depends on IBMEBUS
-	---help---
-	This driver supports the deprecated IBM pSeries eHCA InfiniBand
-	adapter.
-
-	To compile the driver as a module, choose M here. The module
-	will be called ib_ehca.
-
diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile
deleted file mode 100644
index 74d284e..0000000
--- a/drivers/staging/rdma/ehca/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Authors: Heiko J Schick <schickhj@de.ibm.com>
-#           Christoph Raisch <raisch@de.ibm.com>
-#           Joachim Fenkes <fenkes@de.ibm.com>
-#
-#  Copyright (c) 2005 IBM Corporation
-#
-#  All rights reserved.
-#
-#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
-
-obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
-
-ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
-		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
-		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
-
diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO
deleted file mode 100644
index 199a4a6..0000000
--- a/drivers/staging/rdma/ehca/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-9/2015
-
-The ehca driver has been deprecated and moved to drivers/staging/rdma.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c
deleted file mode 100644
index 94e088c..0000000
--- a/drivers/staging/rdma/ehca/ehca_av.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  address vector functions
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *av_cache;
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-		  enum ib_rate path_rate, u32 *ipd)
-{
-	int path = ib_rate_to_mult(path_rate);
-	int link, ret;
-	struct ib_port_attr pa;
-
-	if (path_rate == IB_RATE_PORT_CURRENT) {
-		*ipd = 0;
-		return 0;
-	}
-
-	if (unlikely(path < 0)) {
-		ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
-			 path_rate);
-		return -EINVAL;
-	}
-
-	ret = ehca_query_port(&shca->ib_device, port, &pa);
-	if (unlikely(ret < 0)) {
-		ehca_err(&shca->ib_device, "Failed to query port  ret=%i", ret);
-		return ret;
-	}
-
-	link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
-
-	if (path >= link)
-		/* no need to throttle if path faster than link */
-		*ipd = 0;
-	else
-		/* IPD = round((link / path) - 1) */
-		*ipd = ((link + (path >> 1)) / path) - 1;
-
-	return 0;
-}
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-	int ret;
-	struct ehca_av *av;
-	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-					      ib_device);
-
-	av = kmem_cache_alloc(av_cache, GFP_KERNEL);
-	if (!av) {
-		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
-			 pd, ah_attr);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	av->av.sl = ah_attr->sl;
-	av->av.dlid = ah_attr->dlid;
-	av->av.slid_path_bits = ah_attr->src_path_bits;
-
-	if (ehca_static_rate < 0) {
-		u32 ipd;
-
-		if (ehca_calc_ipd(shca, ah_attr->port_num,
-				  ah_attr->static_rate, &ipd)) {
-			ret = -EINVAL;
-			goto create_ah_exit1;
-		}
-		av->av.ipd = ipd;
-	} else
-		av->av.ipd = ehca_static_rate;
-
-	av->av.lnh = ah_attr->ah_flags;
-	av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
-					    ah_attr->grh.traffic_class);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-					    ah_attr->grh.flow_label);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-					    ah_attr->grh.hop_limit);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
-	/* set sgid in grh.word_1 */
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		int rc;
-		struct ib_port_attr port_attr;
-		union ib_gid gid;
-
-		memset(&port_attr, 0, sizeof(port_attr));
-		rc = ehca_query_port(pd->device, ah_attr->port_num,
-				     &port_attr);
-		if (rc) { /* invalid port number */
-			ret = -EINVAL;
-			ehca_err(pd->device, "Invalid port number "
-				 "ehca_query_port() returned %x "
-				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
-			goto create_ah_exit1;
-		}
-		memset(&gid, 0, sizeof(gid));
-		rc = ehca_query_gid(pd->device,
-				    ah_attr->port_num,
-				    ah_attr->grh.sgid_index, &gid);
-		if (rc) {
-			ret = -EINVAL;
-			ehca_err(pd->device, "Failed to retrieve sgid "
-				 "ehca_query_gid() returned %x "
-				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
-			goto create_ah_exit1;
-		}
-		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
-	}
-	av->av.pmtu = shca->max_mtu;
-
-	/* dgid comes in grh.word_3 */
-	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
-	       sizeof(ah_attr->grh.dgid));
-
-	return &av->ib_ah;
-
-create_ah_exit1:
-	kmem_cache_free(av_cache, av);
-
-	return ERR_PTR(ret);
-}
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-	struct ehca_av *av;
-	struct ehca_ud_av new_ehca_av;
-	struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
-					      ib_device);
-
-	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
-	new_ehca_av.sl = ah_attr->sl;
-	new_ehca_av.dlid = ah_attr->dlid;
-	new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
-	new_ehca_av.ipd = ah_attr->static_rate;
-	new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
-					 (ah_attr->ah_flags & IB_AH_GRH) > 0);
-	new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
-						ah_attr->grh.traffic_class);
-	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-						 ah_attr->grh.flow_label);
-	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-						 ah_attr->grh.hop_limit);
-	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
-
-	/* set sgid in grh.word_1 */
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		int rc;
-		struct ib_port_attr port_attr;
-		union ib_gid gid;
-
-		memset(&port_attr, 0, sizeof(port_attr));
-		rc = ehca_query_port(ah->device, ah_attr->port_num,
-				     &port_attr);
-		if (rc) { /* invalid port number */
-			ehca_err(ah->device, "Invalid port number "
-				 "ehca_query_port() returned %x "
-				 "ah=%p ah_attr=%p port_num=%x",
-				 rc, ah, ah_attr, ah_attr->port_num);
-			return -EINVAL;
-		}
-		memset(&gid, 0, sizeof(gid));
-		rc = ehca_query_gid(ah->device,
-				    ah_attr->port_num,
-				    ah_attr->grh.sgid_index, &gid);
-		if (rc) {
-			ehca_err(ah->device, "Failed to retrieve sgid "
-				 "ehca_query_gid() returned %x "
-				 "ah=%p ah_attr=%p port_num=%x "
-				 "sgid_index=%x",
-				 rc, ah, ah_attr, ah_attr->port_num,
-				 ah_attr->grh.sgid_index);
-			return -EINVAL;
-		}
-		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
-	}
-
-	new_ehca_av.pmtu = shca->max_mtu;
-
-	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
-	       sizeof(ah_attr->grh.dgid));
-
-	av = container_of(ah, struct ehca_av, ib_ah);
-	av->av = new_ehca_av;
-
-	return 0;
-}
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
-
-	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
-	       sizeof(ah_attr->grh.dgid));
-	ah_attr->sl = av->av.sl;
-
-	ah_attr->dlid = av->av.dlid;
-
-	ah_attr->src_path_bits = av->av.slid_path_bits;
-	ah_attr->static_rate = av->av.ipd;
-	ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
-	ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
-						    av->av.grh.word_0);
-	ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
-						av->av.grh.word_0);
-	ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
-						 av->av.grh.word_0);
-
-	return 0;
-}
-
-int ehca_destroy_ah(struct ib_ah *ah)
-{
-	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
-
-	return 0;
-}
-
-int ehca_init_av_cache(void)
-{
-	av_cache = kmem_cache_create("ehca_cache_av",
-				   sizeof(struct ehca_av), 0,
-				   SLAB_HWCACHE_ALIGN,
-				   NULL);
-	if (!av_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_av_cache(void)
-{
-	kmem_cache_destroy(av_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
deleted file mode 100644
index e8c3387..0000000
--- a/drivers/staging/rdma/ehca/ehca_classes.h
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Struct definition for eHCA internal structures
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_H__
-#define __EHCA_CLASSES_H__
-
-struct ehca_module;
-struct ehca_qp;
-struct ehca_cq;
-struct ehca_eq;
-struct ehca_mr;
-struct ehca_mw;
-struct ehca_pd;
-struct ehca_av;
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
-
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
-#include "ipz_pt_fn.h"
-#include "ehca_qes.h"
-#include "ehca_irq.h"
-
-#define EHCA_EQE_CACHE_SIZE 20
-#define EHCA_MAX_NUM_QUEUES 0xffff
-
-struct ehca_eqe_cache_entry {
-	struct ehca_eqe *eqe;
-	struct ehca_cq *cq;
-};
-
-struct ehca_eq {
-	u32 length;
-	struct ipz_queue ipz_queue;
-	struct ipz_eq_handle ipz_eq_handle;
-	struct work_struct work;
-	struct h_galpas galpas;
-	int is_initialized;
-	struct ehca_pfeq pf;
-	spinlock_t spinlock;
-	struct tasklet_struct interrupt_task;
-	u32 ist;
-	spinlock_t irq_spinlock;
-	struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
-};
-
-struct ehca_sma_attr {
-	u16 lid, lmc, sm_sl, sm_lid;
-	u16 pkey_tbl_len, pkeys[16];
-};
-
-struct ehca_sport {
-	struct ib_cq *ibcq_aqp1;
-	struct ib_qp *ibqp_sqp[2];
-	/* lock to serialze modify_qp() calls for sqp in normal
-	 * and irq path (when event PORT_ACTIVE is received first time)
-	 */
-	spinlock_t mod_sqp_lock;
-	enum ib_port_state port_state;
-	struct ehca_sma_attr saved_attr;
-	u32 pma_qp_nr;
-};
-
-#define HCA_CAP_MR_PGSIZE_4K  0x80000000
-#define HCA_CAP_MR_PGSIZE_64K 0x40000000
-#define HCA_CAP_MR_PGSIZE_1M  0x20000000
-#define HCA_CAP_MR_PGSIZE_16M 0x10000000
-
-struct ehca_shca {
-	struct ib_device ib_device;
-	struct platform_device *ofdev;
-	u8 num_ports;
-	int hw_level;
-	struct list_head shca_list;
-	struct ipz_adapter_handle ipz_hca_handle;
-	struct ehca_sport sport[2];
-	struct ehca_eq eq;
-	struct ehca_eq neq;
-	struct ehca_mr *maxmr;
-	struct ehca_pd *pd;
-	struct h_galpas galpas;
-	struct mutex modify_mutex;
-	u64 hca_cap;
-	/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
-	u32 hca_cap_mr_pgsize;
-	int max_mtu;
-	int max_num_qps;
-	int max_num_cqs;
-	atomic_t num_cqs;
-	atomic_t num_qps;
-};
-
-struct ehca_pd {
-	struct ib_pd ib_pd;
-	struct ipz_pd fw_pd;
-	/* small queue mgmt */
-	struct mutex lock;
-	struct list_head free[2];
-	struct list_head full[2];
-};
-
-enum ehca_ext_qp_type {
-	EQPT_NORMAL    = 0,
-	EQPT_LLQP      = 1,
-	EQPT_SRQBASE   = 2,
-	EQPT_SRQ       = 3,
-};
-
-/* struct to cache modify_qp()'s parms for GSI/SMI qp */
-struct ehca_mod_qp_parm {
-	int mask;
-	struct ib_qp_attr attr;
-};
-
-#define EHCA_MOD_QP_PARM_MAX 4
-
-#define QMAP_IDX_MASK 0xFFFFULL
-
-/* struct for tracking if cqes have been reported to the application */
-struct ehca_qmap_entry {
-	u16 app_wr_id;
-	u8 reported;
-	u8 cqe_req;
-};
-
-struct ehca_queue_map {
-	struct ehca_qmap_entry *map;
-	unsigned int entries;
-	unsigned int tail;
-	unsigned int left_to_poll;
-	unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
-};
-
-/* function to calculate the next index for the qmap */
-static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
-{
-	unsigned int temp = cur_index + 1;
-	return (temp == limit) ? 0 : temp;
-}
-
-struct ehca_qp {
-	union {
-		struct ib_qp ib_qp;
-		struct ib_srq ib_srq;
-	};
-	u32 qp_type;
-	enum ehca_ext_qp_type ext_type;
-	enum ib_qp_state state;
-	struct ipz_queue ipz_squeue;
-	struct ehca_queue_map sq_map;
-	struct ipz_queue ipz_rqueue;
-	struct ehca_queue_map rq_map;
-	struct h_galpas galpas;
-	u32 qkey;
-	u32 real_qp_num;
-	u32 token;
-	spinlock_t spinlock_s;
-	spinlock_t spinlock_r;
-	u32 sq_max_inline_data_size;
-	struct ipz_qp_handle ipz_qp_handle;
-	struct ehca_pfqp pf;
-	struct ib_qp_init_attr init_attr;
-	struct ehca_cq *send_cq;
-	struct ehca_cq *recv_cq;
-	unsigned int sqerr_purgeflag;
-	struct hlist_node list_entries;
-	/* array to cache modify_qp()'s parms for GSI/SMI qp */
-	struct ehca_mod_qp_parm *mod_qp_parm;
-	int mod_qp_parm_idx;
-	/* mmap counter for resources mapped into user space */
-	u32 mm_count_squeue;
-	u32 mm_count_rqueue;
-	u32 mm_count_galpa;
-	/* unsolicited ack circumvention */
-	int unsol_ack_circ;
-	int mtu_shift;
-	u32 message_count;
-	u32 packet_count;
-	atomic_t nr_events; /* events seen */
-	wait_queue_head_t wait_completion;
-	int mig_armed;
-	struct list_head sq_err_node;
-	struct list_head rq_err_node;
-};
-
-#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
-#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
-#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
-
-/* must be power of 2 */
-#define QP_HASHTAB_LEN 8
-
-struct ehca_cq {
-	struct ib_cq ib_cq;
-	struct ipz_queue ipz_queue;
-	struct h_galpas galpas;
-	spinlock_t spinlock;
-	u32 cq_number;
-	u32 token;
-	u32 nr_of_entries;
-	struct ipz_cq_handle ipz_cq_handle;
-	struct ehca_pfcq pf;
-	spinlock_t cb_lock;
-	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
-	struct list_head entry;
-	u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
-	atomic_t nr_events; /* #events seen */
-	wait_queue_head_t wait_completion;
-	spinlock_t task_lock;
-	/* mmap counter for resources mapped into user space */
-	u32 mm_count_queue;
-	u32 mm_count_galpa;
-	struct list_head sqp_err_list;
-	struct list_head rqp_err_list;
-};
-
-enum ehca_mr_flag {
-	EHCA_MR_FLAG_FMR = 0x80000000,	 /* FMR, created with ehca_alloc_fmr */
-	EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
-};
-
-struct ehca_mr {
-	union {
-		struct ib_mr ib_mr;	/* must always be first in ehca_mr */
-		struct ib_fmr ib_fmr;	/* must always be first in ehca_mr */
-	} ib;
-	struct ib_umem *umem;
-	spinlock_t mrlock;
-
-	enum ehca_mr_flag flags;
-	u32 num_kpages;		/* number of kernel pages */
-	u32 num_hwpages;	/* number of hw pages to form MR */
-	u64 hwpage_size;	/* hw page size used for this MR */
-	int acl;		/* ACL (stored here for usage in reregister) */
-	u64 *start;		/* virtual start address (stored here for */
-				/* usage in reregister) */
-	u64 size;		/* size (stored here for usage in reregister) */
-	u32 fmr_page_size;	/* page size for FMR */
-	u32 fmr_max_pages;	/* max pages for FMR */
-	u32 fmr_max_maps;	/* max outstanding maps for FMR */
-	u32 fmr_map_cnt;	/* map counter for FMR */
-	/* fw specific data */
-	struct ipz_mrmw_handle ipz_mr_handle;	/* MR handle for h-calls */
-	struct h_galpas galpas;
-};
-
-struct ehca_mw {
-	struct ib_mw ib_mw;	/* gen2 mw, must always be first in ehca_mw */
-	spinlock_t mwlock;
-
-	u8 never_bound;		/* indication MW was never bound */
-	struct ipz_mrmw_handle ipz_mw_handle;	/* MW handle for h-calls */
-	struct h_galpas galpas;
-};
-
-enum ehca_mr_pgi_type {
-	EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
-				  * ehca_rereg_phys_mr,
-				  * ehca_reg_internal_maxmr */
-	EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
-	EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
-};
-
-struct ehca_mr_pginfo {
-	enum ehca_mr_pgi_type type;
-	u64 num_kpages;
-	u64 kpage_cnt;
-	u64 hwpage_size;     /* hw page size used for this MR */
-	u64 num_hwpages;     /* number of hw pages */
-	u64 hwpage_cnt;      /* counter for hw pages */
-	u64 next_hwpage;     /* next hw page in buffer/chunk/listelem */
-
-	union {
-		struct { /* type EHCA_MR_PGI_PHYS section */
-			u64 addr;
-			u16 size;
-		} phy;
-		struct { /* type EHCA_MR_PGI_USER section */
-			struct ib_umem *region;
-			struct scatterlist *next_sg;
-			u64 next_nmap;
-		} usr;
-		struct { /* type EHCA_MR_PGI_FMR section */
-			u64 fmr_pgsize;
-			u64 *page_list;
-			u64 next_listelem;
-		} fmr;
-	} u;
-};
-
-/* output parameters for MR/FMR hipz calls */
-struct ehca_mr_hipzout_parms {
-	struct ipz_mrmw_handle handle;
-	u32 lkey;
-	u32 rkey;
-	u64 len;
-	u64 vaddr;
-	u32 acl;
-};
-
-/* output parameters for MW hipz calls */
-struct ehca_mw_hipzout_parms {
-	struct ipz_mrmw_handle handle;
-	u32 rkey;
-};
-
-struct ehca_av {
-	struct ib_ah ib_ah;
-	struct ehca_ud_av av;
-};
-
-struct ehca_ucontext {
-	struct ib_ucontext ib_ucontext;
-};
-
-int ehca_init_pd_cache(void);
-void ehca_cleanup_pd_cache(void);
-int ehca_init_cq_cache(void);
-void ehca_cleanup_cq_cache(void);
-int ehca_init_qp_cache(void);
-void ehca_cleanup_qp_cache(void);
-int ehca_init_av_cache(void);
-void ehca_cleanup_av_cache(void);
-int ehca_init_mrmw_cache(void);
-void ehca_cleanup_mrmw_cache(void);
-int ehca_init_small_qp_cache(void);
-void ehca_cleanup_small_qp_cache(void);
-
-extern rwlock_t ehca_qp_idr_lock;
-extern rwlock_t ehca_cq_idr_lock;
-extern struct idr ehca_qp_idr;
-extern struct idr ehca_cq_idr;
-extern spinlock_t shca_list_lock;
-
-extern int ehca_static_rate;
-extern int ehca_port_act_time;
-extern bool ehca_use_hp_mr;
-extern bool ehca_scaling_code;
-extern int ehca_lock_hcalls;
-extern int ehca_nr_ports;
-extern int ehca_max_cq;
-extern int ehca_max_qp;
-
-struct ipzu_queue_resp {
-	u32 qe_size;      /* queue entry size */
-	u32 act_nr_of_sg;
-	u32 queue_length; /* queue length allocated in bytes */
-	u32 pagesize;
-	u32 toggle_state;
-	u32 offset; /* save offset within a page for small_qp */
-};
-
-struct ehca_create_cq_resp {
-	u32 cq_number;
-	u32 token;
-	struct ipzu_queue_resp ipz_queue;
-	u32 fw_handle_ofs;
-	u32 dummy;
-};
-
-struct ehca_create_qp_resp {
-	u32 qp_num;
-	u32 token;
-	u32 qp_type;
-	u32 ext_type;
-	u32 qkey;
-	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
-	u32 real_qp_num;
-	u32 fw_handle_ofs;
-	u32 dummy;
-	struct ipzu_queue_resp ipz_squeue;
-	struct ipzu_queue_resp ipz_rqueue;
-};
-
-struct ehca_alloc_cq_parms {
-	u32 nr_cqe;
-	u32 act_nr_of_entries;
-	u32 act_pages;
-	struct ipz_eq_handle eq_handle;
-};
-
-enum ehca_service_type {
-	ST_RC  = 0,
-	ST_UC  = 1,
-	ST_RD  = 2,
-	ST_UD  = 3,
-};
-
-enum ehca_ll_comp_flags {
-	LLQP_SEND_COMP = 0x20,
-	LLQP_RECV_COMP = 0x40,
-	LLQP_COMP_MASK = 0x60,
-};
-
-struct ehca_alloc_queue_parms {
-	/* input parameters */
-	int max_wr;
-	int max_sge;
-	int page_size;
-	int is_small;
-
-	/* output parameters */
-	u16 act_nr_wqes;
-	u8  act_nr_sges;
-	u32 queue_size; /* bytes for small queues, pages otherwise */
-};
-
-struct ehca_alloc_qp_parms {
-	struct ehca_alloc_queue_parms squeue;
-	struct ehca_alloc_queue_parms rqueue;
-
-	/* input parameters */
-	enum ehca_service_type servicetype;
-	int qp_storage;
-	int sigtype;
-	enum ehca_ext_qp_type ext_type;
-	enum ehca_ll_comp_flags ll_comp_flags;
-	int ud_av_l_key_ctl;
-
-	u32 token;
-	struct ipz_eq_handle eq_handle;
-	struct ipz_pd pd;
-	struct ipz_cq_handle send_cq_handle, recv_cq_handle;
-
-	u32 srq_qpn, srq_token, srq_limit;
-
-	/* output parameters */
-	u32 real_qp_num;
-	struct ipz_qp_handle qp_handle;
-	struct h_galpas galpas;
-};
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
deleted file mode 100644
index 689c357..0000000
--- a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  pSeries interface definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_PSERIES_H__
-#define __EHCA_CLASSES_PSERIES_H__
-
-#include "hcp_phyp.h"
-#include "ipz_pt_fn.h"
-
-
-struct ehca_pfqp {
-	struct ipz_qpt sqpt;
-	struct ipz_qpt rqpt;
-};
-
-struct ehca_pfcq {
-	struct ipz_qpt qpt;
-	u32 cqnr;
-};
-
-struct ehca_pfeq {
-	struct ipz_qpt qpt;
-	struct h_galpa galpa;
-	u32 eqnr;
-};
-
-struct ipz_adapter_handle {
-	u64 handle;
-};
-
-struct ipz_cq_handle {
-	u64 handle;
-};
-
-struct ipz_eq_handle {
-	u64 handle;
-};
-
-struct ipz_qp_handle {
-	u64 handle;
-};
-struct ipz_mrmw_handle {
-	u64 handle;
-};
-
-struct ipz_pd {
-	u32 value;
-};
-
-struct hcp_modify_qp_control_block {
-	u32 qkey;                      /* 00 */
-	u32 rdd;                       /* reliable datagram domain */
-	u32 send_psn;                  /* 02 */
-	u32 receive_psn;               /* 03 */
-	u32 prim_phys_port;            /* 04 */
-	u32 alt_phys_port;             /* 05 */
-	u32 prim_p_key_idx;            /* 06 */
-	u32 alt_p_key_idx;             /* 07 */
-	u32 rdma_atomic_ctrl;          /* 08 */
-	u32 qp_state;                  /* 09 */
-	u32 reserved_10;               /* 10 */
-	u32 rdma_nr_atomic_resp_res;   /* 11 */
-	u32 path_migration_state;      /* 12 */
-	u32 rdma_atomic_outst_dest_qp; /* 13 */
-	u32 dest_qp_nr;                /* 14 */
-	u32 min_rnr_nak_timer_field;   /* 15 */
-	u32 service_level;             /* 16 */
-	u32 send_grh_flag;             /* 17 */
-	u32 retry_count;               /* 18 */
-	u32 timeout;                   /* 19 */
-	u32 path_mtu;                  /* 20 */
-	u32 max_static_rate;           /* 21 */
-	u32 dlid;                      /* 22 */
-	u32 rnr_retry_count;           /* 23 */
-	u32 source_path_bits;          /* 24 */
-	u32 traffic_class;             /* 25 */
-	u32 hop_limit;                 /* 26 */
-	u32 source_gid_idx;            /* 27 */
-	u32 flow_label;                /* 28 */
-	u32 reserved_29;               /* 29 */
-	union {                        /* 30 */
-		u64 dw[2];
-		u8 byte[16];
-	} dest_gid;
-	u32 service_level_al;          /* 34 */
-	u32 send_grh_flag_al;          /* 35 */
-	u32 retry_count_al;            /* 36 */
-	u32 timeout_al;                /* 37 */
-	u32 max_static_rate_al;        /* 38 */
-	u32 dlid_al;                   /* 39 */
-	u32 rnr_retry_count_al;        /* 40 */
-	u32 source_path_bits_al;       /* 41 */
-	u32 traffic_class_al;          /* 42 */
-	u32 hop_limit_al;              /* 43 */
-	u32 source_gid_idx_al;         /* 44 */
-	u32 flow_label_al;             /* 45 */
-	u32 reserved_46;               /* 46 */
-	u32 reserved_47;               /* 47 */
-	union {                        /* 48 */
-		u64 dw[2];
-		u8 byte[16];
-	} dest_gid_al;
-	u32 max_nr_outst_send_wr;      /* 52 */
-	u32 max_nr_outst_recv_wr;      /* 53 */
-	u32 disable_ete_credit_check;  /* 54 */
-	u32 qp_number;                 /* 55 */
-	u64 send_queue_handle;         /* 56 */
-	u64 recv_queue_handle;         /* 58 */
-	u32 actual_nr_sges_in_sq_wqe;  /* 60 */
-	u32 actual_nr_sges_in_rq_wqe;  /* 61 */
-	u32 qp_enable;                 /* 62 */
-	u32 curr_srq_limit;            /* 63 */
-	u64 qp_aff_asyn_ev_log_reg;    /* 64 */
-	u64 shared_rq_hndl;            /* 66 */
-	u64 trigg_doorbell_qp_hndl;    /* 68 */
-	u32 reserved_70_127[58];       /* 70 */
-};
-
-#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM( 0,  0)
-#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM( 2,  2)
-#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM( 3,  3)
-#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM( 4,  4)
-#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM( 5,  5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM( 6,  6)
-#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM( 7,  7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM( 8,  8)
-#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM( 9,  9)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11, 11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12, 12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13, 13)
-#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14, 14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15, 15)
-#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16, 16)
-#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17, 17)
-#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18, 18)
-#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19, 19)
-#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20, 20)
-#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22, 22)
-#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23, 23)
-#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24, 24)
-#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25, 25)
-#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26, 26)
-#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27, 27)
-#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28, 28)
-#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30, 30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31, 31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32, 32)
-#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33, 33)
-#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34, 34)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36, 36)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37, 37)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38, 38)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39, 39)
-#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40, 40)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41, 41)
-#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42, 42)
-#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44, 44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47, 47)
-#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48, 48)
-#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49, 49)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50, 50)
-#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51, 51)
-
-#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c
deleted file mode 100644
index 1aa7931..0000000
--- a/drivers/staging/rdma/ehca/ehca_cq.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Completion queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *cq_cache;
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
-{
-	unsigned int qp_num = qp->real_qp_num;
-	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->spinlock, flags);
-	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-	spin_unlock_irqrestore(&cq->spinlock, flags);
-
-	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
-		 cq->cq_number, qp_num);
-
-	return 0;
-}
-
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
-{
-	int ret = -EINVAL;
-	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-	struct hlist_node *iter;
-	struct ehca_qp *qp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->spinlock, flags);
-	hlist_for_each(iter, &cq->qp_hashtab[key]) {
-		qp = hlist_entry(iter, struct ehca_qp, list_entries);
-		if (qp->real_qp_num == real_qp_num) {
-			hlist_del(iter);
-			ehca_dbg(cq->ib_cq.device,
-				 "removed qp from cq .cq_num=%x real_qp_num=%x",
-				 cq->cq_number, real_qp_num);
-			ret = 0;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&cq->spinlock, flags);
-	if (ret)
-		ehca_err(cq->ib_cq.device,
-			 "qp not found cq_num=%x real_qp_num=%x",
-			 cq->cq_number, real_qp_num);
-
-	return ret;
-}
-
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
-{
-	struct ehca_qp *ret = NULL;
-	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-	struct hlist_node *iter;
-	struct ehca_qp *qp;
-	hlist_for_each(iter, &cq->qp_hashtab[key]) {
-		qp = hlist_entry(iter, struct ehca_qp, list_entries);
-		if (qp->real_qp_num == real_qp_num) {
-			ret = qp;
-			break;
-		}
-	}
-	return ret;
-}
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-			     const struct ib_cq_init_attr *attr,
-			     struct ib_ucontext *context,
-			     struct ib_udata *udata)
-{
-	int cqe = attr->cqe;
-	static const u32 additional_cqe = 20;
-	struct ib_cq *cq;
-	struct ehca_cq *my_cq;
-	struct ehca_shca *shca =
-		container_of(device, struct ehca_shca, ib_device);
-	struct ipz_adapter_handle adapter_handle;
-	struct ehca_alloc_cq_parms param; /* h_call's out parameters */
-	struct h_galpa gal;
-	void *vpage;
-	u32 counter;
-	u64 rpage, cqx_fec, h_ret;
-	int rc, i;
-	unsigned long flags;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
-		return ERR_PTR(-EINVAL);
-
-	if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
-		ehca_err(device, "Unable to create CQ, max number of %i "
-			"CQs reached.", shca->max_num_cqs);
-		ehca_err(device, "To increase the maximum number of CQs "
-			"use the number_of_cqs module parameter.\n");
-		return ERR_PTR(-ENOSPC);
-	}
-
-	my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
-	if (!my_cq) {
-		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
-			 device);
-		atomic_dec(&shca->num_cqs);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
-
-	spin_lock_init(&my_cq->spinlock);
-	spin_lock_init(&my_cq->cb_lock);
-	spin_lock_init(&my_cq->task_lock);
-	atomic_set(&my_cq->nr_events, 0);
-	init_waitqueue_head(&my_cq->wait_completion);
-
-	cq = &my_cq->ib_cq;
-
-	adapter_handle = shca->ipz_hca_handle;
-	param.eq_handle = shca->eq.ipz_eq_handle;
-
-	idr_preload(GFP_KERNEL);
-	write_lock_irqsave(&ehca_cq_idr_lock, flags);
-	rc = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
-	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-	idr_preload_end();
-
-	if (rc < 0) {
-		cq = ERR_PTR(-ENOMEM);
-		ehca_err(device, "Can't allocate new idr entry. device=%p",
-			 device);
-		goto create_cq_exit1;
-	}
-	my_cq->token = rc;
-
-	/*
-	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
-	 * for receiving errors CQEs.
-	 */
-	param.nr_cqe = cqe + additional_cqe;
-	h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
-
-	if (h_ret != H_SUCCESS) {
-		ehca_err(device, "hipz_h_alloc_resource_cq() failed "
-			 "h_ret=%lli device=%p", h_ret, device);
-		cq = ERR_PTR(ehca2ib_return_code(h_ret));
-		goto create_cq_exit2;
-	}
-
-	rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
-				EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
-	if (!rc) {
-		ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
-			 rc, device);
-		cq = ERR_PTR(-EINVAL);
-		goto create_cq_exit3;
-	}
-
-	for (counter = 0; counter < param.act_pages; counter++) {
-		vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-		if (!vpage) {
-			ehca_err(device, "ipz_qpageit_get_inc() "
-				 "returns NULL device=%p", device);
-			cq = ERR_PTR(-EAGAIN);
-			goto create_cq_exit4;
-		}
-		rpage = __pa(vpage);
-
-		h_ret = hipz_h_register_rpage_cq(adapter_handle,
-						 my_cq->ipz_cq_handle,
-						 &my_cq->pf,
-						 0,
-						 0,
-						 rpage,
-						 1,
-						 my_cq->galpas.
-						 kernel);
-
-		if (h_ret < H_SUCCESS) {
-			ehca_err(device, "hipz_h_register_rpage_cq() failed "
-				 "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
-				 "act_pages=%i", my_cq, my_cq->cq_number,
-				 h_ret, counter, param.act_pages);
-			cq = ERR_PTR(-EINVAL);
-			goto create_cq_exit4;
-		}
-
-		if (counter == (param.act_pages - 1)) {
-			vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-			if ((h_ret != H_SUCCESS) || vpage) {
-				ehca_err(device, "Registration of pages not "
-					 "complete ehca_cq=%p cq_num=%x "
-					 "h_ret=%lli", my_cq, my_cq->cq_number,
-					 h_ret);
-				cq = ERR_PTR(-EAGAIN);
-				goto create_cq_exit4;
-			}
-		} else {
-			if (h_ret != H_PAGE_REGISTERED) {
-				ehca_err(device, "Registration of page failed "
-					 "ehca_cq=%p cq_num=%x h_ret=%lli "
-					 "counter=%i act_pages=%i",
-					 my_cq, my_cq->cq_number,
-					 h_ret, counter, param.act_pages);
-				cq = ERR_PTR(-ENOMEM);
-				goto create_cq_exit4;
-			}
-		}
-	}
-
-	ipz_qeit_reset(&my_cq->ipz_queue);
-
-	gal = my_cq->galpas.kernel;
-	cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
-	ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
-		 my_cq, my_cq->cq_number, cqx_fec);
-
-	my_cq->ib_cq.cqe = my_cq->nr_of_entries =
-		param.act_nr_of_entries - additional_cqe;
-	my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
-
-	for (i = 0; i < QP_HASHTAB_LEN; i++)
-		INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
-
-	INIT_LIST_HEAD(&my_cq->sqp_err_list);
-	INIT_LIST_HEAD(&my_cq->rqp_err_list);
-
-	if (context) {
-		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
-		struct ehca_create_cq_resp resp;
-		memset(&resp, 0, sizeof(resp));
-		resp.cq_number = my_cq->cq_number;
-		resp.token = my_cq->token;
-		resp.ipz_queue.qe_size = ipz_queue->qe_size;
-		resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
-		resp.ipz_queue.queue_length = ipz_queue->queue_length;
-		resp.ipz_queue.pagesize = ipz_queue->pagesize;
-		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-		resp.fw_handle_ofs = (u32)
-			(my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
-		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-			ehca_err(device, "Copy to udata failed.");
-			cq = ERR_PTR(-EFAULT);
-			goto create_cq_exit4;
-		}
-	}
-
-	return cq;
-
-create_cq_exit4:
-	ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-
-create_cq_exit3:
-	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-	if (h_ret != H_SUCCESS)
-		ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
-			 "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
-
-create_cq_exit2:
-	write_lock_irqsave(&ehca_cq_idr_lock, flags);
-	idr_remove(&ehca_cq_idr, my_cq->token);
-	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-create_cq_exit1:
-	kmem_cache_free(cq_cache, my_cq);
-
-	atomic_dec(&shca->num_cqs);
-	return cq;
-}
-
-int ehca_destroy_cq(struct ib_cq *cq)
-{
-	u64 h_ret;
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	int cq_num = my_cq->cq_number;
-	struct ib_device *device = cq->device;
-	struct ehca_shca *shca = container_of(device, struct ehca_shca,
-					      ib_device);
-	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	unsigned long flags;
-
-	if (cq->uobject) {
-		if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
-			ehca_err(device, "Resources still referenced in "
-				 "user space cq_num=%x", my_cq->cq_number);
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * remove the CQ from the idr first to make sure
-	 * no more interrupt tasklets will touch this CQ
-	 */
-	write_lock_irqsave(&ehca_cq_idr_lock, flags);
-	idr_remove(&ehca_cq_idr, my_cq->token);
-	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-	/* now wait until all pending events have completed */
-	wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
-
-	/* nobody's using our CQ any longer -- we can destroy it */
-	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
-	if (h_ret == H_R_STATE) {
-		/* cq in err: read err data and destroy it forcibly */
-		ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
-			 "state. Try to delete it forcibly.",
-			 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
-		ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
-		h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-		if (h_ret == H_SUCCESS)
-			ehca_dbg(device, "cq_num=%x deleted successfully.",
-				 cq_num);
-	}
-	if (h_ret != H_SUCCESS) {
-		ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
-			 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
-		return ehca2ib_return_code(h_ret);
-	}
-	ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-	kmem_cache_free(cq_cache, my_cq);
-
-	atomic_dec(&shca->num_cqs);
-	return 0;
-}
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-	/* TODO: proper resize needs to be done */
-	ehca_err(cq->device, "not implemented yet");
-
-	return -EFAULT;
-}
-
-int ehca_init_cq_cache(void)
-{
-	cq_cache = kmem_cache_create("ehca_cache_cq",
-				     sizeof(struct ehca_cq), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!cq_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_cq_cache(void)
-{
-	kmem_cache_destroy(cq_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c
deleted file mode 100644
index 90da674..0000000
--- a/drivers/staging/rdma/ehca/ehca_eq.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Event queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_qes.h"
-#include "hcp_if.h"
-#include "ipz_pt_fn.h"
-
-int ehca_create_eq(struct ehca_shca *shca,
-		   struct ehca_eq *eq,
-		   const enum ehca_eq_type type, const u32 length)
-{
-	int ret;
-	u64 h_ret;
-	u32 nr_pages;
-	u32 i;
-	void *vpage;
-	struct ib_device *ib_dev = &shca->ib_device;
-
-	spin_lock_init(&eq->spinlock);
-	spin_lock_init(&eq->irq_spinlock);
-	eq->is_initialized = 0;
-
-	if (type != EHCA_EQ && type != EHCA_NEQ) {
-		ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
-		return -EINVAL;
-	}
-	if (!length) {
-		ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
-		return -EINVAL;
-	}
-
-	h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
-					 &eq->pf,
-					 type,
-					 length,
-					 &eq->ipz_eq_handle,
-					 &eq->length,
-					 &nr_pages, &eq->ist);
-
-	if (h_ret != H_SUCCESS) {
-		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
-		return -EINVAL;
-	}
-
-	ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
-			     EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
-	if (!ret) {
-		ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
-		goto create_eq_exit1;
-	}
-
-	for (i = 0; i < nr_pages; i++) {
-		u64 rpage;
-
-		vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-		if (!vpage)
-			goto create_eq_exit2;
-
-		rpage = __pa(vpage);
-		h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
-						 eq->ipz_eq_handle,
-						 &eq->pf,
-						 0, 0, rpage, 1);
-
-		if (i == (nr_pages - 1)) {
-			/* last page */
-			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-			if (h_ret != H_SUCCESS || vpage)
-				goto create_eq_exit2;
-		} else {
-			if (h_ret != H_PAGE_REGISTERED)
-				goto create_eq_exit2;
-		}
-	}
-
-	ipz_qeit_reset(&eq->ipz_queue);
-
-	/* register interrupt handlers and initialize work queues */
-	if (type == EHCA_EQ) {
-		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
-
-		ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-					  0, "ehca_eq",
-					  (void *)shca);
-		if (ret < 0)
-			ehca_err(ib_dev, "Can't map interrupt handler.");
-	} else if (type == EHCA_NEQ) {
-		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
-
-		ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-					  0, "ehca_neq",
-					  (void *)shca);
-		if (ret < 0)
-			ehca_err(ib_dev, "Can't map interrupt handler.");
-	}
-
-	eq->is_initialized = 1;
-
-	return 0;
-
-create_eq_exit2:
-	ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-create_eq_exit1:
-	hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-	return -EINVAL;
-}
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-	unsigned long flags;
-	void *eqe;
-
-	spin_lock_irqsave(&eq->spinlock, flags);
-	eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
-	spin_unlock_irqrestore(&eq->spinlock, flags);
-
-	return eqe;
-}
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-	unsigned long flags;
-	u64 h_ret;
-
-	ibmebus_free_irq(eq->ist, (void *)shca);
-
-	spin_lock_irqsave(&shca_list_lock, flags);
-	eq->is_initialized = 0;
-	spin_unlock_irqrestore(&shca_list_lock, flags);
-
-	tasklet_kill(&eq->interrupt_task);
-
-	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't free EQ resources.");
-		return -EINVAL;
-	}
-	ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c
deleted file mode 100644
index e8b1bb6..0000000
--- a/drivers/staging/rdma/ehca/ehca_hca.c
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HCA query functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/gfp.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static unsigned int limit_uint(unsigned int value)
-{
-	return min_t(unsigned int, value, INT_MAX);
-}
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-		      struct ib_udata *uhw)
-{
-	int i, ret = 0;
-	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-					      ib_device);
-	struct hipz_query_hca *rblock;
-
-	static const u32 cap_mapping[] = {
-		IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
-		IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
-		IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
-		IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
-		IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
-		IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
-		IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
-		IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
-		IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
-		IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
-		IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
-	};
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query device properties");
-		ret = -EINVAL;
-		goto query_device1;
-	}
-
-	memset(props, 0, sizeof(struct ib_device_attr));
-	props->page_size_cap   = shca->hca_cap_mr_pgsize;
-	props->fw_ver          = rblock->hw_ver;
-	props->max_mr_size     = rblock->max_mr_size;
-	props->vendor_id       = rblock->vendor_id >> 8;
-	props->vendor_part_id  = rblock->vendor_part_id >> 16;
-	props->hw_ver          = rblock->hw_ver;
-	props->max_qp          = limit_uint(rblock->max_qp);
-	props->max_qp_wr       = limit_uint(rblock->max_wqes_wq);
-	props->max_sge         = limit_uint(rblock->max_sge);
-	props->max_sge_rd      = limit_uint(rblock->max_sge_rd);
-	props->max_cq          = limit_uint(rblock->max_cq);
-	props->max_cqe         = limit_uint(rblock->max_cqe);
-	props->max_mr          = limit_uint(rblock->max_mr);
-	props->max_mw          = limit_uint(rblock->max_mw);
-	props->max_pd          = limit_uint(rblock->max_pd);
-	props->max_ah          = limit_uint(rblock->max_ah);
-	props->max_ee          = limit_uint(rblock->max_rd_ee_context);
-	props->max_rdd         = limit_uint(rblock->max_rd_domain);
-	props->max_fmr         = limit_uint(rblock->max_mr);
-	props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
-	props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
-	props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
-	props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
-	props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
-
-	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-		props->max_srq         = limit_uint(props->max_qp);
-		props->max_srq_wr      = limit_uint(props->max_qp_wr);
-		props->max_srq_sge     = 3;
-	}
-
-	props->max_pkeys           = 16;
-	/* Some FW versions say 0 here; insert sensible value in that case */
-	props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
-		min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
-	props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
-	props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
-	props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
-	props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
-	props->max_total_mcast_qp_attach
-		= limit_uint(rblock->max_total_mcast_qp_attach);
-
-	/* translate device capabilities */
-	props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
-		IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
-	for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
-		if (rblock->hca_cap_indicators & cap_mapping[i + 1])
-			props->device_cap_flags |= cap_mapping[i];
-
-query_device1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
-{
-	switch (fw_mtu) {
-	case 0x1:
-		return IB_MTU_256;
-	case 0x2:
-		return IB_MTU_512;
-	case 0x3:
-		return IB_MTU_1024;
-	case 0x4:
-		return IB_MTU_2048;
-	case 0x5:
-		return IB_MTU_4096;
-	default:
-		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
-			 fw_mtu);
-		return 0;
-	}
-}
-
-static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
-{
-	switch (vl_cap) {
-	case 0x1:
-		return 1;
-	case 0x2:
-		return 2;
-	case 0x3:
-		return 4;
-	case 0x4:
-		return 8;
-	case 0x5:
-		return 15;
-	default:
-		ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
-			 vl_cap);
-		return 0;
-	}
-}
-
-int ehca_query_port(struct ib_device *ibdev,
-		    u8 port, struct ib_port_attr *props)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-					      ib_device);
-	struct hipz_query_port *rblock;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_port1;
-	}
-
-	memset(props, 0, sizeof(struct ib_port_attr));
-
-	props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
-	props->port_cap_flags  = rblock->capability_mask;
-	props->gid_tbl_len     = rblock->gid_tbl_len;
-	if (rblock->max_msg_sz)
-		props->max_msg_sz      = rblock->max_msg_sz;
-	else
-		props->max_msg_sz      = 0x1 << 31;
-	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
-	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
-	props->pkey_tbl_len    = rblock->pkey_tbl_len;
-	props->lid             = rblock->lid;
-	props->sm_lid          = rblock->sm_lid;
-	props->lmc             = rblock->lmc;
-	props->sm_sl           = rblock->sm_sl;
-	props->subnet_timeout  = rblock->subnet_timeout;
-	props->init_type_reply = rblock->init_type_reply;
-	props->max_vl_num      = map_number_of_vls(shca, rblock->vl_cap);
-
-	if (rblock->state && rblock->phys_width) {
-		props->phys_state      = rblock->phys_pstate;
-		props->state           = rblock->phys_state;
-		props->active_width    = rblock->phys_width;
-		props->active_speed    = rblock->phys_speed;
-	} else {
-		/* old firmware releases don't report physical
-		 * port info, so use default values
-		 */
-		props->phys_state      = 5;
-		props->state           = rblock->state;
-		props->active_width    = IB_WIDTH_12X;
-		props->active_speed    = IB_SPEED_SDR;
-	}
-
-query_port1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-int ehca_query_sma_attr(struct ehca_shca *shca,
-			u8 port, struct ehca_sma_attr *attr)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct hipz_query_port *rblock;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_sma_attr1;
-	}
-
-	memset(attr, 0, sizeof(struct ehca_sma_attr));
-
-	attr->lid    = rblock->lid;
-	attr->lmc    = rblock->lmc;
-	attr->sm_sl  = rblock->sm_sl;
-	attr->sm_lid = rblock->sm_lid;
-
-	attr->pkey_tbl_len = rblock->pkey_tbl_len;
-	memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
-
-query_sma_attr1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca;
-	struct hipz_query_port *rblock;
-
-	shca = container_of(ibdev, struct ehca_shca, ib_device);
-	if (index > 16) {
-		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-		return -EINVAL;
-	}
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_pkey1;
-	}
-
-	memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
-
-query_pkey1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port,
-		   int index, union ib_gid *gid)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-					      ib_device);
-	struct hipz_query_port *rblock;
-
-	if (index < 0 || index > 255) {
-		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-		return -EINVAL;
-	}
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_gid1;
-	}
-
-	memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
-	memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
-
-query_gid1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-static const u32 allowed_port_caps = (
-	IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
-	IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
-	IB_PORT_VENDOR_CLASS_SUP);
-
-int ehca_modify_port(struct ib_device *ibdev,
-		     u8 port, int port_modify_mask,
-		     struct ib_port_modify *props)
-{
-	int ret = 0;
-	struct ehca_shca *shca;
-	struct hipz_query_port *rblock;
-	u32 cap;
-	u64 hret;
-
-	shca = container_of(ibdev, struct ehca_shca, ib_device);
-	if ((props->set_port_cap_mask | props->clr_port_cap_mask)
-	    & ~allowed_port_caps) {
-		ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
-			 "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
-			 props->clr_port_cap_mask, allowed_port_caps);
-		return -EINVAL;
-	}
-
-	if (mutex_lock_interruptible(&shca->modify_mutex))
-		return -ERESTARTSYS;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-		ret = -ENOMEM;
-		goto modify_port1;
-	}
-
-	hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (hret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto modify_port2;
-	}
-
-	cap = (rblock->capability_mask | props->set_port_cap_mask)
-		& ~props->clr_port_cap_mask;
-
-	hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
-				  cap, props->init_type, port_modify_mask);
-	if (hret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Modify port failed  h_ret=%lli",
-			 hret);
-		ret = -EINVAL;
-	}
-
-modify_port2:
-	ehca_free_fw_ctrlblock(rblock);
-
-modify_port1:
-	mutex_unlock(&shca->modify_mutex);
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c
deleted file mode 100644
index 8615d7c..0000000
--- a/drivers/staging/rdma/ehca/ehca_irq.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Functions for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <linux/smpboot.h>
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
-#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
-#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
-#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
-#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
-
-#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
-#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
-#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
-#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
-#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
-
-#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
-#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
-
-static void queue_comp_task(struct ehca_cq *__cq);
-
-static struct ehca_comp_pool *pool;
-
-static inline void comp_event_callback(struct ehca_cq *cq)
-{
-	if (!cq->ib_cq.comp_handler)
-		return;
-
-	spin_lock(&cq->cb_lock);
-	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
-	spin_unlock(&cq->cb_lock);
-
-	return;
-}
-
-static void print_error_data(struct ehca_shca *shca, void *data,
-			     u64 *rblock, int length)
-{
-	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
-	u64 resource = rblock[1];
-
-	switch (type) {
-	case 0x1: /* Queue Pair */
-	{
-		struct ehca_qp *qp = (struct ehca_qp *)data;
-
-		/* only print error data if AER is set */
-		if (rblock[6] == 0)
-			return;
-
-		ehca_err(&shca->ib_device,
-			 "QP 0x%x (resource=%llx) has errors.",
-			 qp->ib_qp.qp_num, resource);
-		break;
-	}
-	case 0x4: /* Completion Queue */
-	{
-		struct ehca_cq *cq = (struct ehca_cq *)data;
-
-		ehca_err(&shca->ib_device,
-			 "CQ 0x%x (resource=%llx) has errors.",
-			 cq->cq_number, resource);
-		break;
-	}
-	default:
-		ehca_err(&shca->ib_device,
-			 "Unknown error type: %llx on %s.",
-			 type, shca->ib_device.name);
-		break;
-	}
-
-	ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
-	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
-		 "---------------------------------------------------");
-	ehca_dmp(rblock, length, "resource=%llx", resource);
-	ehca_err(&shca->ib_device, "EHCA ----- error data end "
-		 "----------------------------------------------------");
-
-	return;
-}
-
-int ehca_error_data(struct ehca_shca *shca, void *data,
-		    u64 resource)
-{
-
-	unsigned long ret;
-	u64 *rblock;
-	unsigned long block_count;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
-		ret = -ENOMEM;
-		goto error_data1;
-	}
-
-	/* rblock must be 4K aligned and should be 4K large */
-	ret = hipz_h_error_data(shca->ipz_hca_handle,
-				resource,
-				rblock,
-				&block_count);
-
-	if (ret == H_R_STATE)
-		ehca_err(&shca->ib_device,
-			 "No error data is available: %llx.", resource);
-	else if (ret == H_SUCCESS) {
-		int length;
-
-		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
-
-		if (length > EHCA_PAGESIZE)
-			length = EHCA_PAGESIZE;
-
-		print_error_data(shca, data, rblock, length);
-	} else
-		ehca_err(&shca->ib_device,
-			 "Error data could not be fetched: %llx", resource);
-
-	ehca_free_fw_ctrlblock(rblock);
-
-error_data1:
-	return ret;
-
-}
-
-static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
-			      enum ib_event_type event_type)
-{
-	struct ib_event event;
-
-	/* PATH_MIG without the QP ever having been armed is false alarm */
-	if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
-		return;
-
-	event.device = &shca->ib_device;
-	event.event = event_type;
-
-	if (qp->ext_type == EQPT_SRQ) {
-		if (!qp->ib_srq.event_handler)
-			return;
-
-		event.element.srq = &qp->ib_srq;
-		qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
-	} else {
-		if (!qp->ib_qp.event_handler)
-			return;
-
-		event.element.qp = &qp->ib_qp;
-		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
-	}
-}
-
-static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
-			      enum ib_event_type event_type, int fatal)
-{
-	struct ehca_qp *qp;
-	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
-
-	read_lock(&ehca_qp_idr_lock);
-	qp = idr_find(&ehca_qp_idr, token);
-	if (qp)
-		atomic_inc(&qp->nr_events);
-	read_unlock(&ehca_qp_idr_lock);
-
-	if (!qp)
-		return;
-
-	if (fatal)
-		ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
-
-	dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
-			  IB_EVENT_SRQ_ERR : event_type);
-
-	/*
-	 * eHCA only processes one WQE at a time for SRQ base QPs,
-	 * so the last WQE has been processed as soon as the QP enters
-	 * error state.
-	 */
-	if (fatal && qp->ext_type == EQPT_SRQBASE)
-		dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
-
-	if (atomic_dec_and_test(&qp->nr_events))
-		wake_up(&qp->wait_completion);
-	return;
-}
-
-static void cq_event_callback(struct ehca_shca *shca,
-			      u64 eqe)
-{
-	struct ehca_cq *cq;
-	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
-
-	read_lock(&ehca_cq_idr_lock);
-	cq = idr_find(&ehca_cq_idr, token);
-	if (cq)
-		atomic_inc(&cq->nr_events);
-	read_unlock(&ehca_cq_idr_lock);
-
-	if (!cq)
-		return;
-
-	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
-
-	if (atomic_dec_and_test(&cq->nr_events))
-		wake_up(&cq->wait_completion);
-
-	return;
-}
-
-static void parse_identifier(struct ehca_shca *shca, u64 eqe)
-{
-	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
-
-	switch (identifier) {
-	case 0x02: /* path migrated */
-		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
-		break;
-	case 0x03: /* communication established */
-		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
-		break;
-	case 0x04: /* send queue drained */
-		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
-		break;
-	case 0x05: /* QP error */
-	case 0x06: /* QP error */
-		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
-		break;
-	case 0x07: /* CQ error */
-	case 0x08: /* CQ error */
-		cq_event_callback(shca, eqe);
-		break;
-	case 0x09: /* MRMWPTE error */
-		ehca_err(&shca->ib_device, "MRMWPTE error.");
-		break;
-	case 0x0A: /* port event */
-		ehca_err(&shca->ib_device, "Port event.");
-		break;
-	case 0x0B: /* MR access error */
-		ehca_err(&shca->ib_device, "MR access error.");
-		break;
-	case 0x0C: /* EQ error */
-		ehca_err(&shca->ib_device, "EQ error.");
-		break;
-	case 0x0D: /* P/Q_Key mismatch */
-		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
-		break;
-	case 0x10: /* sampling complete */
-		ehca_err(&shca->ib_device, "Sampling complete.");
-		break;
-	case 0x11: /* unaffiliated access error */
-		ehca_err(&shca->ib_device, "Unaffiliated access error.");
-		break;
-	case 0x12: /* path migrating */
-		ehca_err(&shca->ib_device, "Path migrating.");
-		break;
-	case 0x13: /* interface trace stopped */
-		ehca_err(&shca->ib_device, "Interface trace stopped.");
-		break;
-	case 0x14: /* first error capture info available */
-		ehca_info(&shca->ib_device, "First error capture available");
-		break;
-	case 0x15: /* SRQ limit reached */
-		qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
-		break;
-	default:
-		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
-			 identifier, shca->ib_device.name);
-		break;
-	}
-
-	return;
-}
-
-static void dispatch_port_event(struct ehca_shca *shca, int port_num,
-				enum ib_event_type type, const char *msg)
-{
-	struct ib_event event;
-
-	ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
-	event.device = &shca->ib_device;
-	event.event = type;
-	event.element.port_num = port_num;
-	ib_dispatch_event(&event);
-}
-
-static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
-{
-	struct ehca_sma_attr  new_attr;
-	struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
-
-	ehca_query_sma_attr(shca, port_num, &new_attr);
-
-	if (new_attr.sm_sl  != old_attr->sm_sl ||
-	    new_attr.sm_lid != old_attr->sm_lid)
-		dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
-				    "SM changed");
-
-	if (new_attr.lid != old_attr->lid ||
-	    new_attr.lmc != old_attr->lmc)
-		dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
-				    "LID changed");
-
-	if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
-	    memcmp(new_attr.pkeys, old_attr->pkeys,
-		   sizeof(u16) * new_attr.pkey_tbl_len))
-		dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
-				    "P_Key changed");
-
-	*old_attr = new_attr;
-}
-
-/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
-static int replay_modify_qp(struct ehca_sport *sport)
-{
-	int aqp1_destroyed;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-
-	aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
-
-	if (sport->ibqp_sqp[IB_QPT_SMI])
-		ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
-	if (!aqp1_destroyed)
-		ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
-
-	spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
-	return aqp1_destroyed;
-}
-
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
-{
-	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
-	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
-	u8 spec_event;
-	struct ehca_sport *sport = &shca->sport[port - 1];
-
-	switch (ec) {
-	case 0x30: /* port availability change */
-		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-			/* only replay modify_qp calls in autodetect mode;
-			 * if AQP1 was destroyed, the port is already down
-			 * again and we can drop the event.
-			 */
-			if (ehca_nr_ports < 0)
-				if (replay_modify_qp(sport))
-					break;
-
-			sport->port_state = IB_PORT_ACTIVE;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-					    "is active");
-			ehca_query_sma_attr(shca, port, &sport->saved_attr);
-		} else {
-			sport->port_state = IB_PORT_DOWN;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-					    "is inactive");
-		}
-		break;
-	case 0x31:
-		/* port configuration change
-		 * disruptive change is caused by
-		 * LID, PKEY or SM change
-		 */
-		if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
-			ehca_warn(&shca->ib_device, "disruptive port "
-				  "%d configuration change", port);
-
-			sport->port_state = IB_PORT_DOWN;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-					    "is inactive");
-
-			sport->port_state = IB_PORT_ACTIVE;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-					    "is active");
-			ehca_query_sma_attr(shca, port,
-					    &sport->saved_attr);
-		} else
-			notify_port_conf_change(shca, port);
-		break;
-	case 0x32: /* adapter malfunction */
-		ehca_err(&shca->ib_device, "Adapter malfunction.");
-		break;
-	case 0x33:  /* trace stopped */
-		ehca_err(&shca->ib_device, "Traced stopped.");
-		break;
-	case 0x34: /* util async event */
-		spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
-		if (spec_event == 0x80) /* client reregister required */
-			dispatch_port_event(shca, port,
-					    IB_EVENT_CLIENT_REREGISTER,
-					    "client reregister req.");
-		else
-			ehca_warn(&shca->ib_device, "Unknown util async "
-				  "event %x on port %x", spec_event, port);
-		break;
-	default:
-		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
-			 ec, shca->ib_device.name);
-		break;
-	}
-
-	return;
-}
-
-static inline void reset_eq_pending(struct ehca_cq *cq)
-{
-	u64 CQx_EP;
-	struct h_galpa gal = cq->galpas.kernel;
-
-	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
-	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
-
-	return;
-}
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-	tasklet_hi_schedule(&shca->neq.interrupt_task);
-
-	return IRQ_HANDLED;
-}
-
-void ehca_tasklet_neq(unsigned long data)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)data;
-	struct ehca_eqe *eqe;
-	u64 ret;
-
-	eqe = ehca_poll_eq(shca, &shca->neq);
-
-	while (eqe) {
-		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
-			parse_ec(shca, eqe->entry);
-
-		eqe = ehca_poll_eq(shca, &shca->neq);
-	}
-
-	ret = hipz_h_reset_event(shca->ipz_hca_handle,
-				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
-
-	if (ret != H_SUCCESS)
-		ehca_err(&shca->ib_device, "Can't clear notification events.");
-
-	return;
-}
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-	tasklet_hi_schedule(&shca->eq.interrupt_task);
-
-	return IRQ_HANDLED;
-}
-
-
-static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
-{
-	u64 eqe_value;
-	u32 token;
-	struct ehca_cq *cq;
-
-	eqe_value = eqe->entry;
-	ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
-	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-		ehca_dbg(&shca->ib_device, "Got completion event");
-		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-		read_lock(&ehca_cq_idr_lock);
-		cq = idr_find(&ehca_cq_idr, token);
-		if (cq)
-			atomic_inc(&cq->nr_events);
-		read_unlock(&ehca_cq_idr_lock);
-		if (cq == NULL) {
-			ehca_err(&shca->ib_device,
-				 "Invalid eqe for non-existing cq token=%x",
-				 token);
-			return;
-		}
-		reset_eq_pending(cq);
-		if (ehca_scaling_code)
-			queue_comp_task(cq);
-		else {
-			comp_event_callback(cq);
-			if (atomic_dec_and_test(&cq->nr_events))
-				wake_up(&cq->wait_completion);
-		}
-	} else {
-		ehca_dbg(&shca->ib_device, "Got non completion event");
-		parse_identifier(shca, eqe_value);
-	}
-}
-
-void ehca_process_eq(struct ehca_shca *shca, int is_irq)
-{
-	struct ehca_eq *eq = &shca->eq;
-	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
-	u64 eqe_value, ret;
-	int eqe_cnt, i;
-	int eq_empty = 0;
-
-	spin_lock(&eq->irq_spinlock);
-	if (is_irq) {
-		const int max_query_cnt = 100;
-		int query_cnt = 0;
-		int int_state = 1;
-		do {
-			int_state = hipz_h_query_int_state(
-				shca->ipz_hca_handle, eq->ist);
-			query_cnt++;
-			iosync();
-		} while (int_state && query_cnt < max_query_cnt);
-		if (unlikely((query_cnt == max_query_cnt)))
-			ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
-				 int_state, query_cnt);
-	}
-
-	/* read out all eqes */
-	eqe_cnt = 0;
-	do {
-		u32 token;
-		eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
-		if (!eqe_cache[eqe_cnt].eqe)
-			break;
-		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
-		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-			read_lock(&ehca_cq_idr_lock);
-			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
-			if (eqe_cache[eqe_cnt].cq)
-				atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
-			read_unlock(&ehca_cq_idr_lock);
-			if (!eqe_cache[eqe_cnt].cq) {
-				ehca_err(&shca->ib_device,
-					 "Invalid eqe for non-existing cq "
-					 "token=%x", token);
-				continue;
-			}
-		} else
-			eqe_cache[eqe_cnt].cq = NULL;
-		eqe_cnt++;
-	} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
-	if (!eqe_cnt) {
-		if (is_irq)
-			ehca_dbg(&shca->ib_device,
-				 "No eqe found for irq event");
-		goto unlock_irq_spinlock;
-	} else if (!is_irq) {
-		ret = hipz_h_eoi(eq->ist);
-		if (ret != H_SUCCESS)
-			ehca_err(&shca->ib_device,
-				 "bad return code EOI -rc = %lld\n", ret);
-		ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
-	}
-	if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
-		ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
-	/* enable irq for new packets */
-	for (i = 0; i < eqe_cnt; i++) {
-		if (eq->eqe_cache[i].cq)
-			reset_eq_pending(eq->eqe_cache[i].cq);
-	}
-	/* check eq */
-	spin_lock(&eq->spinlock);
-	eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
-	spin_unlock(&eq->spinlock);
-	/* call completion handler for cached eqes */
-	for (i = 0; i < eqe_cnt; i++)
-		if (eq->eqe_cache[i].cq) {
-			if (ehca_scaling_code)
-				queue_comp_task(eq->eqe_cache[i].cq);
-			else {
-				struct ehca_cq *cq = eq->eqe_cache[i].cq;
-				comp_event_callback(cq);
-				if (atomic_dec_and_test(&cq->nr_events))
-					wake_up(&cq->wait_completion);
-			}
-		} else {
-			ehca_dbg(&shca->ib_device, "Got non completion event");
-			parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
-		}
-	/* poll eq if not empty */
-	if (eq_empty)
-		goto unlock_irq_spinlock;
-	do {
-		struct ehca_eqe *eqe;
-		eqe = ehca_poll_eq(shca, &shca->eq);
-		if (!eqe)
-			break;
-		process_eqe(shca, eqe);
-	} while (1);
-
-unlock_irq_spinlock:
-	spin_unlock(&eq->irq_spinlock);
-}
-
-void ehca_tasklet_eq(unsigned long data)
-{
-	ehca_process_eq((struct ehca_shca*)data, 1);
-}
-
-static int find_next_online_cpu(struct ehca_comp_pool *pool)
-{
-	int cpu;
-	unsigned long flags;
-
-	WARN_ON_ONCE(!in_interrupt());
-	if (ehca_debug_level >= 3)
-		ehca_dmp(cpu_online_mask, cpumask_size(), "");
-
-	spin_lock_irqsave(&pool->last_cpu_lock, flags);
-	do {
-		cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
-		if (cpu >= nr_cpu_ids)
-			cpu = cpumask_first(cpu_online_mask);
-		pool->last_cpu = cpu;
-	} while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
-	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
-
-	return cpu;
-}
-
-static void __queue_comp_task(struct ehca_cq *__cq,
-			      struct ehca_cpu_comp_task *cct,
-			      struct task_struct *thread)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cct->task_lock, flags);
-	spin_lock(&__cq->task_lock);
-
-	if (__cq->nr_callbacks == 0) {
-		__cq->nr_callbacks++;
-		list_add_tail(&__cq->entry, &cct->cq_list);
-		cct->cq_jobs++;
-		wake_up_process(thread);
-	} else
-		__cq->nr_callbacks++;
-
-	spin_unlock(&__cq->task_lock);
-	spin_unlock_irqrestore(&cct->task_lock, flags);
-}
-
-static void queue_comp_task(struct ehca_cq *__cq)
-{
-	int cpu_id;
-	struct ehca_cpu_comp_task *cct;
-	struct task_struct *thread;
-	int cq_jobs;
-	unsigned long flags;
-
-	cpu_id = find_next_online_cpu(pool);
-	BUG_ON(!cpu_online(cpu_id));
-
-	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-	BUG_ON(!cct || !thread);
-
-	spin_lock_irqsave(&cct->task_lock, flags);
-	cq_jobs = cct->cq_jobs;
-	spin_unlock_irqrestore(&cct->task_lock, flags);
-	if (cq_jobs > 0) {
-		cpu_id = find_next_online_cpu(pool);
-		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-		thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-		BUG_ON(!cct || !thread);
-	}
-	__queue_comp_task(__cq, cct, thread);
-}
-
-static void run_comp_task(struct ehca_cpu_comp_task *cct)
-{
-	struct ehca_cq *cq;
-
-	while (!list_empty(&cct->cq_list)) {
-		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-		spin_unlock_irq(&cct->task_lock);
-
-		comp_event_callback(cq);
-		if (atomic_dec_and_test(&cq->nr_events))
-			wake_up(&cq->wait_completion);
-
-		spin_lock_irq(&cct->task_lock);
-		spin_lock(&cq->task_lock);
-		cq->nr_callbacks--;
-		if (!cq->nr_callbacks) {
-			list_del_init(cct->cq_list.next);
-			cct->cq_jobs--;
-		}
-		spin_unlock(&cq->task_lock);
-	}
-}
-
-static void comp_task_park(unsigned int cpu)
-{
-	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-	struct ehca_cpu_comp_task *target;
-	struct task_struct *thread;
-	struct ehca_cq *cq, *tmp;
-	LIST_HEAD(list);
-
-	spin_lock_irq(&cct->task_lock);
-	cct->cq_jobs = 0;
-	cct->active = 0;
-	list_splice_init(&cct->cq_list, &list);
-	spin_unlock_irq(&cct->task_lock);
-
-	cpu = find_next_online_cpu(pool);
-	target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
-	spin_lock_irq(&target->task_lock);
-	list_for_each_entry_safe(cq, tmp, &list, entry) {
-		list_del(&cq->entry);
-		__queue_comp_task(cq, target, thread);
-	}
-	spin_unlock_irq(&target->task_lock);
-}
-
-static void comp_task_stop(unsigned int cpu, bool online)
-{
-	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-	spin_lock_irq(&cct->task_lock);
-	cct->cq_jobs = 0;
-	cct->active = 0;
-	WARN_ON(!list_empty(&cct->cq_list));
-	spin_unlock_irq(&cct->task_lock);
-}
-
-static int comp_task_should_run(unsigned int cpu)
-{
-	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-	return cct->cq_jobs;
-}
-
-static void comp_task(unsigned int cpu)
-{
-	struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
-	int cql_empty;
-
-	spin_lock_irq(&cct->task_lock);
-	cql_empty = list_empty(&cct->cq_list);
-	if (!cql_empty) {
-		__set_current_state(TASK_RUNNING);
-		run_comp_task(cct);
-	}
-	spin_unlock_irq(&cct->task_lock);
-}
-
-static struct smp_hotplug_thread comp_pool_threads = {
-	.thread_should_run	= comp_task_should_run,
-	.thread_fn		= comp_task,
-	.thread_comm		= "ehca_comp/%u",
-	.cleanup		= comp_task_stop,
-	.park			= comp_task_park,
-};
-
-int ehca_create_comp_pool(void)
-{
-	int cpu, ret = -ENOMEM;
-
-	if (!ehca_scaling_code)
-		return 0;
-
-	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
-	if (pool == NULL)
-		return -ENOMEM;
-
-	spin_lock_init(&pool->last_cpu_lock);
-	pool->last_cpu = cpumask_any(cpu_online_mask);
-
-	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
-	if (!pool->cpu_comp_tasks)
-		goto out_pool;
-
-	pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
-	if (!pool->cpu_comp_threads)
-		goto out_tasks;
-
-	for_each_present_cpu(cpu) {
-		struct ehca_cpu_comp_task *cct;
-
-		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-		spin_lock_init(&cct->task_lock);
-		INIT_LIST_HEAD(&cct->cq_list);
-	}
-
-	comp_pool_threads.store = pool->cpu_comp_threads;
-	ret = smpboot_register_percpu_thread(&comp_pool_threads);
-	if (ret)
-		goto out_threads;
-
-	pr_info("eHCA scaling code enabled\n");
-	return ret;
-
-out_threads:
-	free_percpu(pool->cpu_comp_threads);
-out_tasks:
-	free_percpu(pool->cpu_comp_tasks);
-out_pool:
-	kfree(pool);
-	return ret;
-}
-
-void ehca_destroy_comp_pool(void)
-{
-	if (!ehca_scaling_code)
-		return;
-
-	smpboot_unregister_percpu_thread(&comp_pool_threads);
-
-	free_percpu(pool->cpu_comp_threads);
-	free_percpu(pool->cpu_comp_tasks);
-	kfree(pool);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h
deleted file mode 100644
index 5370199..0000000
--- a/drivers/staging/rdma/ehca/ehca_irq.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions and structs for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IRQ_H
-#define __EHCA_IRQ_H
-
-
-struct ehca_shca;
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
-void ehca_tasklet_neq(unsigned long data);
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
-void ehca_tasklet_eq(unsigned long data);
-void ehca_process_eq(struct ehca_shca *shca, int is_irq);
-
-struct ehca_cpu_comp_task {
-	struct list_head cq_list;
-	spinlock_t task_lock;
-	int cq_jobs;
-	int active;
-};
-
-struct ehca_comp_pool {
-	struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
-	struct task_struct * __percpu *cpu_comp_threads;
-	int last_cpu;
-	spinlock_t last_cpu_lock;
-};
-
-int ehca_create_comp_pool(void);
-void ehca_destroy_comp_pool(void);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
deleted file mode 100644
index cca5933..0000000
--- a/drivers/staging/rdma/ehca/ehca_iverbs.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions for internal functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Dietmar Decker <ddecker@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IVERBS_H__
-#define __EHCA_IVERBS_H__
-
-#include "ehca_classes.h"
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-		      struct ib_udata *uhw);
-
-int ehca_query_port(struct ib_device *ibdev, u8 port,
-		    struct ib_port_attr *props);
-
-enum rdma_protocol_type
-ehca_query_protocol(struct ib_device *device, u8 port_num);
-
-int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
-			struct ehca_sma_attr *attr);
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
-		   union ib_gid *gid);
-
-int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
-		     struct ib_port_modify *props);
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata);
-
-int ehca_dealloc_pd(struct ib_pd *pd);
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_destroy_ah(struct ib_ah *ah);
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt, int mr_access_flags,
-			       struct ib_udata *udata);
-
-int ehca_dereg_mr(struct ib_mr *mr);
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-int ehca_dealloc_mw(struct ib_mw *mw);
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-			      int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr);
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-		      u64 *page_list, int list_len, u64 iova);
-
-int ehca_unmap_fmr(struct list_head *fmr_list);
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr);
-
-enum ehca_eq_type {
-	EHCA_EQ = 0, /* Event Queue              */
-	EHCA_NEQ     /* Notification Event Queue */
-};
-
-int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
-		   enum ehca_eq_type type, const u32 length);
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-			     const struct ib_cq_init_attr *attr,
-			     struct ib_ucontext *context,
-			     struct ib_udata *udata);
-
-int ehca_destroy_cq(struct ib_cq *cq);
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
-
-int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata);
-
-int ehca_destroy_qp(struct ib_qp *qp);
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-		   struct ib_udata *udata);
-
-int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
-		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-		   struct ib_send_wr **bad_send_wr);
-
-int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-		   struct ib_recv_wr **bad_recv_wr);
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-		       struct ib_recv_wr *recv_wr,
-		       struct ib_recv_wr **bad_recv_wr);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-			       struct ib_srq_init_attr *init_attr,
-			       struct ib_udata *udata);
-
-int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-
-int ehca_destroy_srq(struct ib_srq *srq);
-
-u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
-		    struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-					struct ib_udata *udata);
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context);
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in, size_t in_mad_size,
-		     struct ib_mad_hdr *out, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index);
-
-void ehca_poll_eqs(unsigned long data);
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-		  enum ib_rate path_rate, u32 *ipd);
-
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
-
-#ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(gfp_t flags);
-void ehca_free_fw_ctrlblock(void *ptr);
-#else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
-#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
-#endif
-
-void ehca_recover_sqp(struct ib_qp *sqp);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
deleted file mode 100644
index 832f22f..0000000
--- a/drivers/staging/rdma/ehca/ehca_main.c
+++ /dev/null
@@ -1,1118 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  module start stop, hca detection
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <linux/slab.h>
-#endif
-
-#include <linux/notifier.h>
-#include <linux/memory.h>
-#include <rdma/ib_mad.h>
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-#define HCAD_VERSION "0029"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
-MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION(HCAD_VERSION);
-
-static bool ehca_open_aqp1    = 0;
-static int ehca_hw_level      = 0;
-static bool ehca_poll_all_eqs = 1;
-
-int ehca_debug_level   = 0;
-int ehca_nr_ports      = -1;
-bool ehca_use_hp_mr    = 0;
-int ehca_port_act_time = 30;
-int ehca_static_rate   = -1;
-bool ehca_scaling_code = 0;
-int ehca_lock_hcalls   = -1;
-int ehca_max_cq        = -1;
-int ehca_max_qp        = -1;
-
-module_param_named(open_aqp1,     ehca_open_aqp1,     bool, S_IRUGO);
-module_param_named(debug_level,   ehca_debug_level,   int,  S_IRUGO);
-module_param_named(hw_level,      ehca_hw_level,      int,  S_IRUGO);
-module_param_named(nr_ports,      ehca_nr_ports,      int,  S_IRUGO);
-module_param_named(use_hp_mr,     ehca_use_hp_mr,     bool, S_IRUGO);
-module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
-module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
-module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
-module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
-module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
-module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
-
-MODULE_PARM_DESC(open_aqp1,
-		 "Open AQP1 on startup (default: no)");
-MODULE_PARM_DESC(debug_level,
-		 "Amount of debug output (0: none (default), 1: traces, "
-		 "2: some dumps, 3: lots)");
-MODULE_PARM_DESC(hw_level,
-		 "Hardware level (0: autosensing (default), "
-		 "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
-MODULE_PARM_DESC(nr_ports,
-		 "number of connected ports (-1: autodetect (default), "
-		 "1: port one only, 2: two ports)");
-MODULE_PARM_DESC(use_hp_mr,
-		 "Use high performance MRs (default: no)");
-MODULE_PARM_DESC(port_act_time,
-		 "Time to wait for port activation (default: 30 sec)");
-MODULE_PARM_DESC(poll_all_eqs,
-		 "Poll all event queues periodically (default: yes)");
-MODULE_PARM_DESC(static_rate,
-		 "Set permanent static rate (default: no static rate)");
-MODULE_PARM_DESC(scaling_code,
-		 "Enable scaling code (default: no)");
-MODULE_PARM_DESC(lock_hcalls,
-		 "Serialize all hCalls made by the driver "
-		 "(default: autodetect)");
-MODULE_PARM_DESC(number_of_cqs,
-		"Max number of CQs which can be allocated "
-		"(default: autodetect)");
-MODULE_PARM_DESC(number_of_qps,
-		"Max number of QPs which can be allocated "
-		"(default: autodetect)");
-
-DEFINE_RWLOCK(ehca_qp_idr_lock);
-DEFINE_RWLOCK(ehca_cq_idr_lock);
-DEFINE_IDR(ehca_qp_idr);
-DEFINE_IDR(ehca_cq_idr);
-
-static LIST_HEAD(shca_list); /* list of all registered ehcas */
-DEFINE_SPINLOCK(shca_list_lock);
-
-static struct timer_list poll_eqs_timer;
-
-#ifdef CONFIG_PPC_64K_PAGES
-static struct kmem_cache *ctblk_cache;
-
-void *ehca_alloc_fw_ctrlblock(gfp_t flags)
-{
-	void *ret = kmem_cache_zalloc(ctblk_cache, flags);
-	if (!ret)
-		ehca_gen_err("Out of memory for ctblk");
-	return ret;
-}
-
-void ehca_free_fw_ctrlblock(void *ptr)
-{
-	if (ptr)
-		kmem_cache_free(ctblk_cache, ptr);
-
-}
-#endif
-
-int ehca2ib_return_code(u64 ehca_rc)
-{
-	switch (ehca_rc) {
-	case H_SUCCESS:
-		return 0;
-	case H_RESOURCE:             /* Resource in use */
-	case H_BUSY:
-		return -EBUSY;
-	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-	case H_CONSTRAINED:          /* resource constraint */
-	case H_NO_MEM:
-		return -ENOMEM;
-	default:
-		return -EINVAL;
-	}
-}
-
-static int ehca_create_slab_caches(void)
-{
-	int ret;
-
-	ret = ehca_init_pd_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create PD SLAB cache.");
-		return ret;
-	}
-
-	ret = ehca_init_cq_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create CQ SLAB cache.");
-		goto create_slab_caches2;
-	}
-
-	ret = ehca_init_qp_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create QP SLAB cache.");
-		goto create_slab_caches3;
-	}
-
-	ret = ehca_init_av_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create AV SLAB cache.");
-		goto create_slab_caches4;
-	}
-
-	ret = ehca_init_mrmw_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create MR&MW SLAB cache.");
-		goto create_slab_caches5;
-	}
-
-	ret = ehca_init_small_qp_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create small queue SLAB cache.");
-		goto create_slab_caches6;
-	}
-
-#ifdef CONFIG_PPC_64K_PAGES
-	ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
-					EHCA_PAGESIZE, H_CB_ALIGNMENT,
-					SLAB_HWCACHE_ALIGN,
-					NULL);
-	if (!ctblk_cache) {
-		ehca_gen_err("Cannot create ctblk SLAB cache.");
-		ehca_cleanup_small_qp_cache();
-		ret = -ENOMEM;
-		goto create_slab_caches6;
-	}
-#endif
-	return 0;
-
-create_slab_caches6:
-	ehca_cleanup_mrmw_cache();
-
-create_slab_caches5:
-	ehca_cleanup_av_cache();
-
-create_slab_caches4:
-	ehca_cleanup_qp_cache();
-
-create_slab_caches3:
-	ehca_cleanup_cq_cache();
-
-create_slab_caches2:
-	ehca_cleanup_pd_cache();
-
-	return ret;
-}
-
-static void ehca_destroy_slab_caches(void)
-{
-	ehca_cleanup_small_qp_cache();
-	ehca_cleanup_mrmw_cache();
-	ehca_cleanup_av_cache();
-	ehca_cleanup_qp_cache();
-	ehca_cleanup_cq_cache();
-	ehca_cleanup_pd_cache();
-#ifdef CONFIG_PPC_64K_PAGES
-	kmem_cache_destroy(ctblk_cache);
-#endif
-}
-
-#define EHCA_HCAAVER  EHCA_BMASK_IBM(32, 39)
-#define EHCA_REVID    EHCA_BMASK_IBM(40, 63)
-
-static struct cap_descr {
-	u64 mask;
-	char *descr;
-} hca_cap_descr[] = {
-	{ HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
-	{ HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
-	{ HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
-	{ HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
-	{ HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
-	{ HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
-	{ HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
-	{ HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
-	{ HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
-	{ HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
-	{ HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
-	{ HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
-	{ HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
-	{ HCA_CAP_SRQ, "HCA_CAP_SRQ" },
-	{ HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
-	{ HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
-	{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
-	{ HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
-};
-
-static int ehca_sense_attributes(struct ehca_shca *shca)
-{
-	int i, ret = 0;
-	u64 h_ret;
-	struct hipz_query_hca *rblock;
-	struct hipz_query_port *port;
-	const char *loc_code;
-
-	static const u32 pgsize_map[] = {
-		HCA_CAP_MR_PGSIZE_4K,  0x1000,
-		HCA_CAP_MR_PGSIZE_64K, 0x10000,
-		HCA_CAP_MR_PGSIZE_1M,  0x100000,
-		HCA_CAP_MR_PGSIZE_16M, 0x1000000,
-	};
-
-	ehca_gen_dbg("Probing adapter %s...",
-		     shca->ofdev->dev.of_node->full_name);
-	loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
-				   NULL);
-	if (loc_code)
-		ehca_gen_dbg(" ... location lode=%s", loc_code);
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_gen_err("Cannot allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_gen_err("Cannot query device properties. h_ret=%lli",
-			     h_ret);
-		ret = -EPERM;
-		goto sense_attributes1;
-	}
-
-	if (ehca_nr_ports == 1)
-		shca->num_ports = 1;
-	else
-		shca->num_ports = (u8)rblock->num_ports;
-
-	ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
-
-	if (ehca_hw_level == 0) {
-		u32 hcaaver;
-		u32 revid;
-
-		hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
-		revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
-
-		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
-
-		if (hcaaver == 1) {
-			if (revid <= 3)
-				shca->hw_level = 0x10 | (revid + 1);
-			else
-				shca->hw_level = 0x14;
-		} else if (hcaaver == 2) {
-			if (revid == 0)
-				shca->hw_level = 0x21;
-			else if (revid == 0x10)
-				shca->hw_level = 0x22;
-			else if (revid == 0x20 || revid == 0x21)
-				shca->hw_level = 0x23;
-		}
-
-		if (!shca->hw_level) {
-			ehca_gen_warn("unknown hardware version"
-				      " - assuming default level");
-			shca->hw_level = 0x22;
-		}
-	} else
-		shca->hw_level = ehca_hw_level;
-	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
-
-	shca->hca_cap = rblock->hca_cap_indicators;
-	ehca_gen_dbg(" ... HCA capabilities:");
-	for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
-		if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
-			ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
-
-	/* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
-	 * a firmware property, so it's valid across all adapters
-	 */
-	if (ehca_lock_hcalls == -1)
-		ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
-					shca->hca_cap);
-
-	/* translate supported MR page sizes; always support 4K */
-	shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
-	for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
-		if (rblock->memory_page_size_supported & pgsize_map[i])
-			shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
-
-	/* Set maximum number of CQs and QPs to calculate EQ size */
-	if (shca->max_num_qps == -1)
-		shca->max_num_qps = min_t(int, rblock->max_qp,
-					  EHCA_MAX_NUM_QUEUES);
-	else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
-		ehca_gen_warn("The requested number of QPs is out of range "
-			      "(1 - %i) specified by HW. Value is set to %i",
-			      rblock->max_qp, rblock->max_qp);
-		shca->max_num_qps = rblock->max_qp;
-	}
-
-	if (shca->max_num_cqs == -1)
-		shca->max_num_cqs = min_t(int, rblock->max_cq,
-					  EHCA_MAX_NUM_QUEUES);
-	else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
-		ehca_gen_warn("The requested number of CQs is out of range "
-			      "(1 - %i) specified by HW. Value is set to %i",
-			      rblock->max_cq, rblock->max_cq);
-	}
-
-	/* query max MTU from first port -- it's the same for all ports */
-	port = (struct hipz_query_port *)rblock;
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
-	if (h_ret != H_SUCCESS) {
-		ehca_gen_err("Cannot query port properties. h_ret=%lli",
-			     h_ret);
-		ret = -EPERM;
-		goto sense_attributes1;
-	}
-
-	shca->max_mtu = port->max_mtu;
-
-sense_attributes1:
-	ehca_free_fw_ctrlblock(rblock);
-	return ret;
-}
-
-static int init_node_guid(struct ehca_shca *shca)
-{
-	int ret = 0;
-	struct hipz_query_hca *rblock;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query device properties");
-		ret = -EINVAL;
-		goto init_node_guid1;
-	}
-
-	memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
-
-init_node_guid1:
-	ehca_free_fw_ctrlblock(rblock);
-	return ret;
-}
-
-static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num,
-			       struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = ehca_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-	return 0;
-}
-
-static int ehca_init_device(struct ehca_shca *shca)
-{
-	int ret;
-
-	ret = init_node_guid(shca);
-	if (ret)
-		return ret;
-
-	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
-	shca->ib_device.owner               = THIS_MODULE;
-
-	shca->ib_device.uverbs_abi_ver	    = 8;
-	shca->ib_device.uverbs_cmd_mask	    =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-
-	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
-	shca->ib_device.phys_port_cnt       = shca->num_ports;
-	shca->ib_device.num_comp_vectors    = 1;
-	shca->ib_device.dma_device          = &shca->ofdev->dev;
-	shca->ib_device.query_device        = ehca_query_device;
-	shca->ib_device.query_port          = ehca_query_port;
-	shca->ib_device.query_gid           = ehca_query_gid;
-	shca->ib_device.query_pkey          = ehca_query_pkey;
-	/* shca->in_device.modify_device    = ehca_modify_device    */
-	shca->ib_device.modify_port         = ehca_modify_port;
-	shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
-	shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
-	shca->ib_device.alloc_pd            = ehca_alloc_pd;
-	shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
-	shca->ib_device.create_ah	    = ehca_create_ah;
-	/* shca->ib_device.modify_ah	    = ehca_modify_ah;	    */
-	shca->ib_device.query_ah	    = ehca_query_ah;
-	shca->ib_device.destroy_ah	    = ehca_destroy_ah;
-	shca->ib_device.create_qp	    = ehca_create_qp;
-	shca->ib_device.modify_qp	    = ehca_modify_qp;
-	shca->ib_device.query_qp	    = ehca_query_qp;
-	shca->ib_device.destroy_qp	    = ehca_destroy_qp;
-	shca->ib_device.post_send	    = ehca_post_send;
-	shca->ib_device.post_recv	    = ehca_post_recv;
-	shca->ib_device.create_cq	    = ehca_create_cq;
-	shca->ib_device.destroy_cq	    = ehca_destroy_cq;
-	shca->ib_device.resize_cq	    = ehca_resize_cq;
-	shca->ib_device.poll_cq		    = ehca_poll_cq;
-	/* shca->ib_device.peek_cq	    = ehca_peek_cq;	    */
-	shca->ib_device.req_notify_cq	    = ehca_req_notify_cq;
-	/* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
-	shca->ib_device.get_dma_mr	    = ehca_get_dma_mr;
-	shca->ib_device.reg_user_mr	    = ehca_reg_user_mr;
-	shca->ib_device.dereg_mr	    = ehca_dereg_mr;
-	shca->ib_device.alloc_mw	    = ehca_alloc_mw;
-	shca->ib_device.dealloc_mw	    = ehca_dealloc_mw;
-	shca->ib_device.alloc_fmr	    = ehca_alloc_fmr;
-	shca->ib_device.map_phys_fmr	    = ehca_map_phys_fmr;
-	shca->ib_device.unmap_fmr	    = ehca_unmap_fmr;
-	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
-	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
-	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
-	shca->ib_device.process_mad	    = ehca_process_mad;
-	shca->ib_device.mmap		    = ehca_mmap;
-	shca->ib_device.dma_ops		    = &ehca_dma_mapping_ops;
-	shca->ib_device.get_port_immutable  = ehca_port_immutable;
-
-	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-		shca->ib_device.uverbs_cmd_mask |=
-			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
-			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
-			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
-			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
-		shca->ib_device.create_srq          = ehca_create_srq;
-		shca->ib_device.modify_srq          = ehca_modify_srq;
-		shca->ib_device.query_srq           = ehca_query_srq;
-		shca->ib_device.destroy_srq         = ehca_destroy_srq;
-		shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
-	}
-
-	return ret;
-}
-
-static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
-{
-	struct ehca_sport *sport = &shca->sport[port - 1];
-	struct ib_cq *ibcq;
-	struct ib_qp *ibqp;
-	struct ib_qp_init_attr qp_init_attr;
-	struct ib_cq_init_attr cq_attr = {};
-	int ret;
-
-	if (sport->ibcq_aqp1) {
-		ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
-		return -EPERM;
-	}
-
-	cq_attr.cqe = 10;
-	ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1),
-			    &cq_attr);
-	if (IS_ERR(ibcq)) {
-		ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
-		return PTR_ERR(ibcq);
-	}
-	sport->ibcq_aqp1 = ibcq;
-
-	if (sport->ibqp_sqp[IB_QPT_GSI]) {
-		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
-		ret = -EPERM;
-		goto create_aqp1;
-	}
-
-	memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
-	qp_init_attr.send_cq          = ibcq;
-	qp_init_attr.recv_cq          = ibcq;
-	qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
-	qp_init_attr.cap.max_send_wr  = 100;
-	qp_init_attr.cap.max_recv_wr  = 100;
-	qp_init_attr.cap.max_send_sge = 2;
-	qp_init_attr.cap.max_recv_sge = 1;
-	qp_init_attr.qp_type          = IB_QPT_GSI;
-	qp_init_attr.port_num         = port;
-	qp_init_attr.qp_context       = NULL;
-	qp_init_attr.event_handler    = NULL;
-	qp_init_attr.srq              = NULL;
-
-	ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
-	if (IS_ERR(ibqp)) {
-		ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
-		ret = PTR_ERR(ibqp);
-		goto create_aqp1;
-	}
-	sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
-
-	return 0;
-
-create_aqp1:
-	ib_destroy_cq(sport->ibcq_aqp1);
-	return ret;
-}
-
-static int ehca_destroy_aqp1(struct ehca_sport *sport)
-{
-	int ret;
-
-	ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
-	if (ret) {
-		ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
-		return ret;
-	}
-
-	ret = ib_destroy_cq(sport->ibcq_aqp1);
-	if (ret)
-		ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
-
-	return ret;
-}
-
-static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
-}
-
-static ssize_t ehca_store_debug_level(struct device_driver *ddp,
-				      const char *buf, size_t count)
-{
-	int value = (*buf) - '0';
-	if (value >= 0 && value <= 9)
-		ehca_debug_level = value;
-	return 1;
-}
-
-static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
-		   ehca_show_debug_level, ehca_store_debug_level);
-
-static struct attribute *ehca_drv_attrs[] = {
-	&driver_attr_debug_level.attr,
-	NULL
-};
-
-static struct attribute_group ehca_drv_attr_grp = {
-	.attrs = ehca_drv_attrs
-};
-
-static const struct attribute_group *ehca_drv_attr_groups[] = {
-	&ehca_drv_attr_grp,
-	NULL,
-};
-
-#define EHCA_RESOURCE_ATTR(name)                                           \
-static ssize_t  ehca_show_##name(struct device *dev,                       \
-				 struct device_attribute *attr,            \
-				 char *buf)                                \
-{									   \
-	struct ehca_shca *shca;						   \
-	struct hipz_query_hca *rblock;				           \
-	int data;                                                          \
-									   \
-	shca = dev_get_drvdata(dev);					   \
-									   \
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);			   \
-	if (!rblock) {						           \
-		dev_err(dev, "Can't allocate rblock memory.\n");           \
-		return 0;						   \
-	}								   \
-									   \
-	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
-		dev_err(dev, "Can't query device properties\n");           \
-		ehca_free_fw_ctrlblock(rblock);			   	   \
-		return 0;					   	   \
-	}								   \
-									   \
-	data = rblock->name;                                               \
-	ehca_free_fw_ctrlblock(rblock);                                    \
-									   \
-	if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))	   \
-		return snprintf(buf, 256, "1\n");			   \
-	else								   \
-		return snprintf(buf, 256, "%d\n", data);		   \
-									   \
-}									   \
-static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
-
-EHCA_RESOURCE_ATTR(num_ports);
-EHCA_RESOURCE_ATTR(hw_ver);
-EHCA_RESOURCE_ATTR(max_eq);
-EHCA_RESOURCE_ATTR(cur_eq);
-EHCA_RESOURCE_ATTR(max_cq);
-EHCA_RESOURCE_ATTR(cur_cq);
-EHCA_RESOURCE_ATTR(max_qp);
-EHCA_RESOURCE_ATTR(cur_qp);
-EHCA_RESOURCE_ATTR(max_mr);
-EHCA_RESOURCE_ATTR(cur_mr);
-EHCA_RESOURCE_ATTR(max_mw);
-EHCA_RESOURCE_ATTR(cur_mw);
-EHCA_RESOURCE_ATTR(max_pd);
-EHCA_RESOURCE_ATTR(max_ah);
-
-static ssize_t ehca_show_adapter_handle(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct ehca_shca *shca = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
-
-}
-static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
-
-static struct attribute *ehca_dev_attrs[] = {
-	&dev_attr_adapter_handle.attr,
-	&dev_attr_num_ports.attr,
-	&dev_attr_hw_ver.attr,
-	&dev_attr_max_eq.attr,
-	&dev_attr_cur_eq.attr,
-	&dev_attr_max_cq.attr,
-	&dev_attr_cur_cq.attr,
-	&dev_attr_max_qp.attr,
-	&dev_attr_cur_qp.attr,
-	&dev_attr_max_mr.attr,
-	&dev_attr_cur_mr.attr,
-	&dev_attr_max_mw.attr,
-	&dev_attr_cur_mw.attr,
-	&dev_attr_max_pd.attr,
-	&dev_attr_max_ah.attr,
-	NULL
-};
-
-static struct attribute_group ehca_dev_attr_grp = {
-	.attrs = ehca_dev_attrs
-};
-
-static int ehca_probe(struct platform_device *dev)
-{
-	struct ehca_shca *shca;
-	const u64 *handle;
-	struct ib_pd *ibpd;
-	int ret, i, eq_size;
-	unsigned long flags;
-
-	handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
-	if (!handle) {
-		ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
-			     dev->dev.of_node->full_name);
-		return -ENODEV;
-	}
-
-	if (!(*handle)) {
-		ehca_gen_err("Wrong eHCA handle for adapter: %s.",
-			     dev->dev.of_node->full_name);
-		return -ENODEV;
-	}
-
-	shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
-	if (!shca) {
-		ehca_gen_err("Cannot allocate shca memory.");
-		return -ENOMEM;
-	}
-
-	mutex_init(&shca->modify_mutex);
-	atomic_set(&shca->num_cqs, 0);
-	atomic_set(&shca->num_qps, 0);
-	shca->max_num_qps = ehca_max_qp;
-	shca->max_num_cqs = ehca_max_cq;
-
-	for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
-		spin_lock_init(&shca->sport[i].mod_sqp_lock);
-
-	shca->ofdev = dev;
-	shca->ipz_hca_handle.handle = *handle;
-	dev_set_drvdata(&dev->dev, shca);
-
-	ret = ehca_sense_attributes(shca);
-	if (ret < 0) {
-		ehca_gen_err("Cannot sense eHCA attributes.");
-		goto probe1;
-	}
-
-	ret = ehca_init_device(shca);
-	if (ret) {
-		ehca_gen_err("Cannot init ehca  device struct");
-		goto probe1;
-	}
-
-	eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
-	/* create event queues */
-	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
-	if (ret) {
-		ehca_err(&shca->ib_device, "Cannot create EQ.");
-		goto probe1;
-	}
-
-	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
-	if (ret) {
-		ehca_err(&shca->ib_device, "Cannot create NEQ.");
-		goto probe3;
-	}
-
-	/* create internal protection domain */
-	ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
-	if (IS_ERR(ibpd)) {
-		ehca_err(&shca->ib_device, "Cannot create internal PD.");
-		ret = PTR_ERR(ibpd);
-		goto probe4;
-	}
-
-	shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
-	shca->pd->ib_pd.device = &shca->ib_device;
-
-	/* create internal max MR */
-	ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
-
-	if (ret) {
-		ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
-			 ret);
-		goto probe5;
-	}
-
-	ret = ib_register_device(&shca->ib_device, NULL);
-	if (ret) {
-		ehca_err(&shca->ib_device,
-			 "ib_register_device() failed ret=%i", ret);
-		goto probe6;
-	}
-
-	/* create AQP1 for port 1 */
-	if (ehca_open_aqp1 == 1) {
-		shca->sport[0].port_state = IB_PORT_DOWN;
-		ret = ehca_create_aqp1(shca, 1);
-		if (ret) {
-			ehca_err(&shca->ib_device,
-				 "Cannot create AQP1 for port 1.");
-			goto probe7;
-		}
-	}
-
-	/* create AQP1 for port 2 */
-	if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
-		shca->sport[1].port_state = IB_PORT_DOWN;
-		ret = ehca_create_aqp1(shca, 2);
-		if (ret) {
-			ehca_err(&shca->ib_device,
-				 "Cannot create AQP1 for port 2.");
-			goto probe8;
-		}
-	}
-
-	ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-	if (ret) /* only complain; we can live without attributes */
-		ehca_err(&shca->ib_device,
-			 "Cannot create device attributes  ret=%d", ret);
-
-	spin_lock_irqsave(&shca_list_lock, flags);
-	list_add(&shca->shca_list, &shca_list);
-	spin_unlock_irqrestore(&shca_list_lock, flags);
-
-	return 0;
-
-probe8:
-	ret = ehca_destroy_aqp1(&shca->sport[0]);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy AQP1 for port 1. ret=%i", ret);
-
-probe7:
-	ib_unregister_device(&shca->ib_device);
-
-probe6:
-	ret = ehca_dereg_internal_maxmr(shca);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal MR. ret=%x", ret);
-
-probe5:
-	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal PD. ret=%x", ret);
-
-probe4:
-	ret = ehca_destroy_eq(shca, &shca->neq);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy NEQ. ret=%x", ret);
-
-probe3:
-	ret = ehca_destroy_eq(shca, &shca->eq);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy EQ. ret=%x", ret);
-
-probe1:
-	ib_dealloc_device(&shca->ib_device);
-
-	return -EINVAL;
-}
-
-static int ehca_remove(struct platform_device *dev)
-{
-	struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
-	unsigned long flags;
-	int ret;
-
-	sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-
-	if (ehca_open_aqp1 == 1) {
-		int i;
-		for (i = 0; i < shca->num_ports; i++) {
-			ret = ehca_destroy_aqp1(&shca->sport[i]);
-			if (ret)
-				ehca_err(&shca->ib_device,
-					 "Cannot destroy AQP1 for port %x "
-					 "ret=%i", ret, i);
-		}
-	}
-
-	ib_unregister_device(&shca->ib_device);
-
-	ret = ehca_dereg_internal_maxmr(shca);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal MR. ret=%i", ret);
-
-	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal PD. ret=%i", ret);
-
-	ret = ehca_destroy_eq(shca, &shca->eq);
-	if (ret)
-		ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
-
-	ret = ehca_destroy_eq(shca, &shca->neq);
-	if (ret)
-		ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
-
-	ib_dealloc_device(&shca->ib_device);
-
-	spin_lock_irqsave(&shca_list_lock, flags);
-	list_del(&shca->shca_list);
-	spin_unlock_irqrestore(&shca_list_lock, flags);
-
-	return ret;
-}
-
-static struct of_device_id ehca_device_table[] =
-{
-	{
-		.name       = "lhca",
-		.compatible = "IBM,lhca",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, ehca_device_table);
-
-static struct platform_driver ehca_driver = {
-	.probe       = ehca_probe,
-	.remove      = ehca_remove,
-	.driver = {
-		.name = "ehca",
-		.owner = THIS_MODULE,
-		.groups = ehca_drv_attr_groups,
-		.of_match_table = ehca_device_table,
-	},
-};
-
-void ehca_poll_eqs(unsigned long data)
-{
-	struct ehca_shca *shca;
-
-	spin_lock(&shca_list_lock);
-	list_for_each_entry(shca, &shca_list, shca_list) {
-		if (shca->eq.is_initialized) {
-			/* call deadman proc only if eq ptr does not change */
-			struct ehca_eq *eq = &shca->eq;
-			int max = 3;
-			volatile u64 q_ofs, q_ofs2;
-			unsigned long flags;
-			spin_lock_irqsave(&eq->spinlock, flags);
-			q_ofs = eq->ipz_queue.current_q_offset;
-			spin_unlock_irqrestore(&eq->spinlock, flags);
-			do {
-				spin_lock_irqsave(&eq->spinlock, flags);
-				q_ofs2 = eq->ipz_queue.current_q_offset;
-				spin_unlock_irqrestore(&eq->spinlock, flags);
-				max--;
-			} while (q_ofs == q_ofs2 && max > 0);
-			if (q_ofs == q_ofs2)
-				ehca_process_eq(shca, 0);
-		}
-	}
-	mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
-	spin_unlock(&shca_list_lock);
-}
-
-static int ehca_mem_notifier(struct notifier_block *nb,
-			     unsigned long action, void *data)
-{
-	static unsigned long ehca_dmem_warn_time;
-	unsigned long flags;
-
-	switch (action) {
-	case MEM_CANCEL_OFFLINE:
-	case MEM_CANCEL_ONLINE:
-	case MEM_ONLINE:
-	case MEM_OFFLINE:
-		return NOTIFY_OK;
-	case MEM_GOING_ONLINE:
-	case MEM_GOING_OFFLINE:
-		/* only ok if no hca is attached to the lpar */
-		spin_lock_irqsave(&shca_list_lock, flags);
-		if (list_empty(&shca_list)) {
-			spin_unlock_irqrestore(&shca_list_lock, flags);
-			return NOTIFY_OK;
-		} else {
-			spin_unlock_irqrestore(&shca_list_lock, flags);
-			if (printk_timed_ratelimit(&ehca_dmem_warn_time,
-						   30 * 1000))
-				ehca_gen_err("DMEM operations are not allowed"
-					     "in conjunction with eHCA");
-			return NOTIFY_BAD;
-		}
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block ehca_mem_nb = {
-	.notifier_call = ehca_mem_notifier,
-};
-
-static int __init ehca_module_init(void)
-{
-	int ret;
-
-	printk(KERN_INFO "eHCA Infiniband Device Driver "
-	       "(Version " HCAD_VERSION ")\n");
-
-	ret = ehca_create_comp_pool();
-	if (ret) {
-		ehca_gen_err("Cannot create comp pool.");
-		return ret;
-	}
-
-	ret = ehca_create_slab_caches();
-	if (ret) {
-		ehca_gen_err("Cannot create SLAB caches");
-		ret = -ENOMEM;
-		goto module_init1;
-	}
-
-	ret = ehca_create_busmap();
-	if (ret) {
-		ehca_gen_err("Cannot create busmap.");
-		goto module_init2;
-	}
-
-	ret = ibmebus_register_driver(&ehca_driver);
-	if (ret) {
-		ehca_gen_err("Cannot register eHCA device driver");
-		ret = -EINVAL;
-		goto module_init3;
-	}
-
-	ret = register_memory_notifier(&ehca_mem_nb);
-	if (ret) {
-		ehca_gen_err("Failed registering memory add/remove notifier");
-		goto module_init4;
-	}
-
-	if (ehca_poll_all_eqs != 1) {
-		ehca_gen_err("WARNING!!!");
-		ehca_gen_err("It is possible to lose interrupts.");
-	} else {
-		init_timer(&poll_eqs_timer);
-		poll_eqs_timer.function = ehca_poll_eqs;
-		poll_eqs_timer.expires = jiffies + HZ;
-		add_timer(&poll_eqs_timer);
-	}
-
-	return 0;
-
-module_init4:
-	ibmebus_unregister_driver(&ehca_driver);
-
-module_init3:
-	ehca_destroy_busmap();
-
-module_init2:
-	ehca_destroy_slab_caches();
-
-module_init1:
-	ehca_destroy_comp_pool();
-	return ret;
-};
-
-static void __exit ehca_module_exit(void)
-{
-	if (ehca_poll_all_eqs == 1)
-		del_timer_sync(&poll_eqs_timer);
-
-	ibmebus_unregister_driver(&ehca_driver);
-
-	unregister_memory_notifier(&ehca_mem_nb);
-
-	ehca_destroy_busmap();
-
-	ehca_destroy_slab_caches();
-
-	ehca_destroy_comp_pool();
-
-	idr_destroy(&ehca_cq_idr);
-	idr_destroy(&ehca_qp_idr);
-};
-
-module_init(ehca_module_init);
-module_exit(ehca_module_exit);
diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c
deleted file mode 100644
index cec1815..0000000
--- a/drivers/staging/rdma/ehca/ehca_mcast.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  mcast  functions
- *
- *  Authors: Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define MAX_MC_LID 0xFFFE
-#define MIN_MC_LID 0xC000	/* Multicast limits */
-#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
-#define EHCA_VALID_MULTICAST_LID(lid) \
-	(((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
-
-int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-					      ib_device);
-	union ib_gid my_gid;
-	u64 subnet_prefix, interface_id, h_ret;
-
-	if (ibqp->qp_type != IB_QPT_UD) {
-		ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
-		return -EINVAL;
-	}
-
-	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-		ehca_err(ibqp->device, "invalid mulitcast gid");
-		return -EINVAL;
-	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-		return -EINVAL;
-	}
-
-	memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-	interface_id = be64_to_cpu(my_gid.global.interface_id);
-	h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
-				   my_qp->ipz_qp_handle,
-				   my_qp->galpas.kernel,
-				   lid, subnet_prefix, interface_id);
-	if (h_ret != H_SUCCESS)
-		ehca_err(ibqp->device,
-			 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
-			 "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-	return ehca2ib_return_code(h_ret);
-}
-
-int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(ibqp->pd->device,
-					      struct ehca_shca, ib_device);
-	union ib_gid my_gid;
-	u64 subnet_prefix, interface_id, h_ret;
-
-	if (ibqp->qp_type != IB_QPT_UD) {
-		ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
-		return -EINVAL;
-	}
-
-	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-		ehca_err(ibqp->device, "invalid mulitcast gid");
-		return -EINVAL;
-	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-		return -EINVAL;
-	}
-
-	memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-	interface_id = be64_to_cpu(my_gid.global.interface_id);
-	h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
-				   my_qp->ipz_qp_handle,
-				   my_qp->galpas.kernel,
-				   lid, subnet_prefix, interface_id);
-	if (h_ret != H_SUCCESS)
-		ehca_err(ibqp->device,
-			 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
-			 "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-	return ehca2ib_return_code(h_ret);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
deleted file mode 100644
index 3367205..0000000
--- a/drivers/staging/rdma/ehca/ehca_mrmw.c
+++ /dev/null
@@ -1,2202 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <rdma/ib_umem.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "hcp_if.h"
-#include "hipz_hw.h"
-
-#define NUM_CHUNKS(length, chunk_size) \
-	(((length) + (chunk_size - 1)) / (chunk_size))
-
-/* max number of rpages (per hcall register_rpages) */
-#define MAX_RPAGES 512
-
-/* DMEM toleration management */
-#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
-#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
-#define EHCA_HUGEPAGESHIFT     34
-#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
-#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
-#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
-#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
-#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
-#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
-#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
-#define EHCA_DIR_MAP_SIZE (0x10000)
-#define EHCA_ENT_MAP_SIZE (0x10000)
-#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
-
-static unsigned long ehca_mr_len;
-
-/*
- * Memory map data structures
- */
-struct ehca_dir_bmap {
-	u64 ent[EHCA_MAP_ENTRIES];
-};
-struct ehca_top_bmap {
-	struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
-};
-struct ehca_bmap {
-	struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
-};
-
-static struct ehca_bmap *ehca_bmap;
-
-static struct kmem_cache *mr_cache;
-static struct kmem_cache *mw_cache;
-
-enum ehca_mr_pgsize {
-	EHCA_MR_PGSIZE4K  = 0x1000L,
-	EHCA_MR_PGSIZE64K = 0x10000L,
-	EHCA_MR_PGSIZE1M  = 0x100000L,
-	EHCA_MR_PGSIZE16M = 0x1000000L
-};
-
-#define EHCA_MR_PGSHIFT4K  12
-#define EHCA_MR_PGSHIFT64K 16
-#define EHCA_MR_PGSHIFT1M  20
-#define EHCA_MR_PGSHIFT16M 24
-
-static u64 ehca_map_vaddr(void *caddr);
-
-static u32 ehca_encode_hwpage_size(u32 pgsize)
-{
-	int log = ilog2(pgsize);
-	WARN_ON(log < 12 || log > 24 || log & 3);
-	return (log - 12) / 4;
-}
-
-static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
-{
-	return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
-}
-
-static struct ehca_mr *ehca_mr_new(void)
-{
-	struct ehca_mr *me;
-
-	me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
-	if (me)
-		spin_lock_init(&me->mrlock);
-	else
-		ehca_gen_err("alloc failed");
-
-	return me;
-}
-
-static void ehca_mr_delete(struct ehca_mr *me)
-{
-	kmem_cache_free(mr_cache, me);
-}
-
-static struct ehca_mw *ehca_mw_new(void)
-{
-	struct ehca_mw *me;
-
-	me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
-	if (me)
-		spin_lock_init(&me->mwlock);
-	else
-		ehca_gen_err("alloc failed");
-
-	return me;
-}
-
-static void ehca_mw_delete(struct ehca_mw *me)
-{
-	kmem_cache_free(mw_cache, me);
-}
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
-{
-	struct ib_mr *ib_mr;
-	int ret;
-	struct ehca_mr *e_maxmr;
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-
-	if (shca->maxmr) {
-		e_maxmr = ehca_mr_new();
-		if (!e_maxmr) {
-			ehca_err(&shca->ib_device, "out of memory");
-			ib_mr = ERR_PTR(-ENOMEM);
-			goto get_dma_mr_exit0;
-		}
-
-		ret = ehca_reg_maxmr(shca, e_maxmr,
-				     (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
-				     mr_access_flags, e_pd,
-				     &e_maxmr->ib.ib_mr.lkey,
-				     &e_maxmr->ib.ib_mr.rkey);
-		if (ret) {
-			ehca_mr_delete(e_maxmr);
-			ib_mr = ERR_PTR(ret);
-			goto get_dma_mr_exit0;
-		}
-		ib_mr = &e_maxmr->ib.ib_mr;
-	} else {
-		ehca_err(&shca->ib_device, "no internal max-MR exist!");
-		ib_mr = ERR_PTR(-EINVAL);
-		goto get_dma_mr_exit0;
-	}
-
-get_dma_mr_exit0:
-	if (IS_ERR(ib_mr))
-		ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
-			 PTR_ERR(ib_mr), pd, mr_access_flags);
-	return ib_mr;
-} /* end ehca_get_dma_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt, int mr_access_flags,
-			       struct ib_udata *udata)
-{
-	struct ib_mr *ib_mr;
-	struct ehca_mr *e_mr;
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_mr_pginfo pginfo;
-	int ret, page_shift;
-	u32 num_kpages;
-	u32 num_hwpages;
-	u64 hwpage_size;
-
-	if (!pd) {
-		ehca_gen_err("bad pd=%p", pd);
-		return ERR_PTR(-EFAULT);
-	}
-
-	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-		/*
-		 * Remote Write Access requires Local Write Access
-		 * Remote Atomic Access requires Local Write Access
-		 */
-		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-			 mr_access_flags);
-		ib_mr = ERR_PTR(-EINVAL);
-		goto reg_user_mr_exit0;
-	}
-
-	if (length == 0 || virt + length < virt) {
-		ehca_err(pd->device, "bad input values: length=%llx "
-			 "virt_base=%llx", length, virt);
-		ib_mr = ERR_PTR(-EINVAL);
-		goto reg_user_mr_exit0;
-	}
-
-	e_mr = ehca_mr_new();
-	if (!e_mr) {
-		ehca_err(pd->device, "out of memory");
-		ib_mr = ERR_PTR(-ENOMEM);
-		goto reg_user_mr_exit0;
-	}
-
-	e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
-				 mr_access_flags, 0);
-	if (IS_ERR(e_mr->umem)) {
-		ib_mr = (void *)e_mr->umem;
-		goto reg_user_mr_exit1;
-	}
-
-	if (e_mr->umem->page_size != PAGE_SIZE) {
-		ehca_err(pd->device, "page size not supported, "
-			 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
-		ib_mr = ERR_PTR(-EINVAL);
-		goto reg_user_mr_exit2;
-	}
-
-	/* determine number of MR pages */
-	num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
-	/* select proper hw_pgsize */
-	page_shift = PAGE_SHIFT;
-	if (e_mr->umem->hugetlb) {
-		/* determine page_shift, clamp between 4K and 16M */
-		page_shift = (fls64(length - 1) + 3) & ~3;
-		page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
-				 EHCA_MR_PGSHIFT16M);
-	}
-	hwpage_size = 1UL << page_shift;
-
-	/* now that we have the desired page size, shift until it's
-	 * supported, too. 4K is always supported, so this terminates.
-	 */
-	while (!(hwpage_size & shca->hca_cap_mr_pgsize))
-		hwpage_size >>= 4;
-
-reg_user_mr_fallback:
-	num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
-	/* register MR on HCA */
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_USER;
-	pginfo.hwpage_size = hwpage_size;
-	pginfo.num_kpages = num_kpages;
-	pginfo.num_hwpages = num_hwpages;
-	pginfo.u.usr.region = e_mr->umem;
-	pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
-	pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
-	ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
-			  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-			  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-	if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
-		ehca_warn(pd->device, "failed to register mr "
-			  "with hwpage_size=%llx", hwpage_size);
-		ehca_info(pd->device, "try to register mr with "
-			  "kpage_size=%lx", PAGE_SIZE);
-		/*
-		 * this means kpages are not contiguous for a hw page
-		 * try kernel page size as fallback solution
-		 */
-		hwpage_size = PAGE_SIZE;
-		goto reg_user_mr_fallback;
-	}
-	if (ret) {
-		ib_mr = ERR_PTR(ret);
-		goto reg_user_mr_exit2;
-	}
-
-	/* successful registration of all pages */
-	return &e_mr->ib.ib_mr;
-
-reg_user_mr_exit2:
-	ib_umem_release(e_mr->umem);
-reg_user_mr_exit1:
-	ehca_mr_delete(e_mr);
-reg_user_mr_exit0:
-	if (IS_ERR(ib_mr))
-		ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
-			 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
-	return ib_mr;
-} /* end ehca_reg_user_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_mr(struct ib_mr *mr)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca =
-		container_of(mr->device, struct ehca_shca, ib_device);
-	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-
-	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-		ret = -EINVAL;
-		goto dereg_mr_exit0;
-	} else if (e_mr == shca->maxmr) {
-		/* should be impossible, however reject to be sure */
-		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
-			 "shca->maxmr=%p mr->lkey=%x",
-			 mr, shca->maxmr, mr->lkey);
-		ret = -EINVAL;
-		goto dereg_mr_exit0;
-	}
-
-	/* TODO: BUSY: MR still has bound window(s) */
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
-			 "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
-			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
-			 e_mr->ipz_mr_handle.handle, mr->lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto dereg_mr_exit0;
-	}
-
-	if (e_mr->umem)
-		ib_umem_release(e_mr->umem);
-
-	/* successful deregistration */
-	ehca_mr_delete(e_mr);
-
-dereg_mr_exit0:
-	if (ret)
-		ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
-	return ret;
-} /* end ehca_dereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-	struct ib_mw *ib_mw;
-	u64 h_ret;
-	struct ehca_mw *e_mw;
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-	struct ehca_mw_hipzout_parms hipzout;
-
-	if (type != IB_MW_TYPE_1)
-		return ERR_PTR(-EINVAL);
-
-	e_mw = ehca_mw_new();
-	if (!e_mw) {
-		ib_mw = ERR_PTR(-ENOMEM);
-		goto alloc_mw_exit0;
-	}
-
-	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
-					 e_pd->fw_pd, &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
-			 "shca=%p hca_hndl=%llx mw=%p",
-			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
-		ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
-		goto alloc_mw_exit1;
-	}
-	/* successful MW allocation */
-	e_mw->ipz_mw_handle = hipzout.handle;
-	e_mw->ib_mw.rkey    = hipzout.rkey;
-	return &e_mw->ib_mw;
-
-alloc_mw_exit1:
-	ehca_mw_delete(e_mw);
-alloc_mw_exit0:
-	if (IS_ERR(ib_mw))
-		ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
-	return ib_mw;
-} /* end ehca_alloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_mw(struct ib_mw *mw)
-{
-	u64 h_ret;
-	struct ehca_shca *shca =
-		container_of(mw->device, struct ehca_shca, ib_device);
-	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
-
-	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
-			 "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
-			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
-			 e_mw->ipz_mw_handle.handle);
-		return ehca2ib_return_code(h_ret);
-	}
-	/* successful deallocation */
-	ehca_mw_delete(e_mw);
-	return 0;
-} /* end ehca_dealloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-			      int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr)
-{
-	struct ib_fmr *ib_fmr;
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_mr *e_fmr;
-	int ret;
-	u32 tmp_lkey, tmp_rkey;
-	struct ehca_mr_pginfo pginfo;
-	u64 hw_pgsize;
-
-	/* check other parameters */
-	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-		/*
-		 * Remote Write Access requires Local Write Access
-		 * Remote Atomic Access requires Local Write Access
-		 */
-		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-			 mr_access_flags);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-	if (mr_access_flags & IB_ACCESS_MW_BIND) {
-		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-			 mr_access_flags);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
-		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
-			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
-			 fmr_attr->max_pages, fmr_attr->max_maps,
-			 fmr_attr->page_shift);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-
-	hw_pgsize = 1 << fmr_attr->page_shift;
-	if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
-		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
-			 fmr_attr->page_shift);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-
-	e_fmr = ehca_mr_new();
-	if (!e_fmr) {
-		ib_fmr = ERR_PTR(-ENOMEM);
-		goto alloc_fmr_exit0;
-	}
-	e_fmr->flags |= EHCA_MR_FLAG_FMR;
-
-	/* register MR on HCA */
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.hwpage_size = hw_pgsize;
-	/*
-	 * pginfo.num_hwpages==0, ie register_rpages() will not be called
-	 * but deferred to map_phys_fmr()
-	 */
-	ret = ehca_reg_mr(shca, e_fmr, NULL,
-			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
-			  mr_access_flags, e_pd, &pginfo,
-			  &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
-	if (ret) {
-		ib_fmr = ERR_PTR(ret);
-		goto alloc_fmr_exit1;
-	}
-
-	/* successful */
-	e_fmr->hwpage_size = hw_pgsize;
-	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
-	e_fmr->fmr_max_pages = fmr_attr->max_pages;
-	e_fmr->fmr_max_maps = fmr_attr->max_maps;
-	e_fmr->fmr_map_cnt = 0;
-	return &e_fmr->ib.ib_fmr;
-
-alloc_fmr_exit1:
-	ehca_mr_delete(e_fmr);
-alloc_fmr_exit0:
-	return ib_fmr;
-} /* end ehca_alloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-		      u64 *page_list,
-		      int list_len,
-		      u64 iova)
-{
-	int ret;
-	struct ehca_shca *shca =
-		container_of(fmr->device, struct ehca_shca, ib_device);
-	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
-	struct ehca_mr_pginfo pginfo;
-	u32 tmp_lkey, tmp_rkey;
-
-	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-			 e_fmr, e_fmr->flags);
-		ret = -EINVAL;
-		goto map_phys_fmr_exit0;
-	}
-	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
-	if (ret)
-		goto map_phys_fmr_exit0;
-	if (iova % e_fmr->fmr_page_size) {
-		/* only whole-numbered pages */
-		ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
-			 iova, e_fmr->fmr_page_size);
-		ret = -EINVAL;
-		goto map_phys_fmr_exit0;
-	}
-	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
-		/* HCAD does not limit the maps, however trace this anyway */
-		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
-			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
-			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
-	}
-
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_FMR;
-	pginfo.num_kpages = list_len;
-	pginfo.hwpage_size = e_fmr->hwpage_size;
-	pginfo.num_hwpages =
-		list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
-	pginfo.u.fmr.page_list = page_list;
-	pginfo.next_hwpage =
-		(iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
-	pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
-
-	ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
-			    list_len * e_fmr->fmr_page_size,
-			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-	if (ret)
-		goto map_phys_fmr_exit0;
-
-	/* successful reregistration */
-	e_fmr->fmr_map_cnt++;
-	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
-	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
-	return 0;
-
-map_phys_fmr_exit0:
-	if (ret)
-		ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
-			 "iova=%llx", ret, fmr, page_list, list_len, iova);
-	return ret;
-} /* end ehca_map_phys_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_fmr(struct list_head *fmr_list)
-{
-	int ret = 0;
-	struct ib_fmr *ib_fmr;
-	struct ehca_shca *shca = NULL;
-	struct ehca_shca *prev_shca;
-	struct ehca_mr *e_fmr;
-	u32 num_fmr = 0;
-	u32 unmap_fmr_cnt = 0;
-
-	/* check all FMR belong to same SHCA, and check internal flag */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		prev_shca = shca;
-		shca = container_of(ib_fmr->device, struct ehca_shca,
-				    ib_device);
-		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-		if ((shca != prev_shca) && prev_shca) {
-			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
-				 "prev_shca=%p e_fmr=%p",
-				 shca, prev_shca, e_fmr);
-			ret = -EINVAL;
-			goto unmap_fmr_exit0;
-		}
-		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
-				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
-			ret = -EINVAL;
-			goto unmap_fmr_exit0;
-		}
-		num_fmr++;
-	}
-
-	/* loop over all FMRs to unmap */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		unmap_fmr_cnt++;
-		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-		shca = container_of(ib_fmr->device, struct ehca_shca,
-				    ib_device);
-		ret = ehca_unmap_one_fmr(shca, e_fmr);
-		if (ret) {
-			/* unmap failed, stop unmapping of rest of FMRs */
-			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
-				 "stop rest, e_fmr=%p num_fmr=%x "
-				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
-				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
-			goto unmap_fmr_exit0;
-		}
-	}
-
-unmap_fmr_exit0:
-	if (ret)
-		ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
-			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
-	return ret;
-} /* end ehca_unmap_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr)
-{
-	int ret;
-	u64 h_ret;
-	struct ehca_shca *shca =
-		container_of(fmr->device, struct ehca_shca, ib_device);
-	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-
-	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-			 e_fmr, e_fmr->flags);
-		ret = -EINVAL;
-		goto free_fmr_exit0;
-	}
-
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
-			 "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
-			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
-			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto free_fmr_exit0;
-	}
-	/* successful deregistration */
-	ehca_mr_delete(e_fmr);
-	return 0;
-
-free_fmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
-	return ret;
-} /* end ehca_dealloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-				   struct ehca_mr *e_mr,
-				   struct ehca_mr_pginfo *pginfo);
-
-int ehca_reg_mr(struct ehca_shca *shca,
-		struct ehca_mr *e_mr,
-		u64 *iova_start,
-		u64 size,
-		int acl,
-		struct ehca_pd *e_pd,
-		struct ehca_mr_pginfo *pginfo,
-		u32 *lkey, /*OUT*/
-		u32 *rkey, /*OUT*/
-		enum ehca_reg_type reg_type)
-{
-	int ret;
-	u64 h_ret;
-	u32 hipz_acl;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-	if (ehca_use_hp_mr == 1)
-		hipz_acl |= 0x00000001;
-
-	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
-					 (u64)iova_start, size, hipz_acl,
-					 e_pd->fw_pd, &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
-			 "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
-		ret = ehca2ib_return_code(h_ret);
-		goto ehca_reg_mr_exit0;
-	}
-
-	e_mr->ipz_mr_handle = hipzout.handle;
-
-	if (reg_type == EHCA_REG_BUSMAP_MR)
-		ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
-	else if (reg_type == EHCA_REG_MR)
-		ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
-	else
-		ret = -EINVAL;
-
-	if (ret)
-		goto ehca_reg_mr_exit1;
-
-	/* successful registration */
-	e_mr->num_kpages = pginfo->num_kpages;
-	e_mr->num_hwpages = pginfo->num_hwpages;
-	e_mr->hwpage_size = pginfo->hwpage_size;
-	e_mr->start = iova_start;
-	e_mr->size = size;
-	e_mr->acl = acl;
-	*lkey = hipzout.lkey;
-	*rkey = hipzout.rkey;
-	return 0;
-
-ehca_reg_mr_exit1:
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
-			 "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
-			 "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
-			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
-			 hipzout.lkey, pginfo, pginfo->num_kpages,
-			 pginfo->num_hwpages, ret);
-		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
-			 "not recoverable");
-	}
-ehca_reg_mr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-			 "num_kpages=%llx num_hwpages=%llx",
-			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
-			 pginfo->num_kpages, pginfo->num_hwpages);
-	return ret;
-} /* end ehca_reg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-		       struct ehca_mr *e_mr,
-		       struct ehca_mr_pginfo *pginfo)
-{
-	int ret = 0;
-	u64 h_ret;
-	u32 rnum;
-	u64 rpage;
-	u32 i;
-	u64 *kpage;
-
-	if (!pginfo->num_hwpages) /* in case of fmr */
-		return 0;
-
-	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!kpage) {
-		ehca_err(&shca->ib_device, "kpage alloc failed");
-		ret = -ENOMEM;
-		goto ehca_reg_mr_rpages_exit0;
-	}
-
-	/* max MAX_RPAGES ehca mr pages per register call */
-	for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
-
-		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-			rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
-			if (rnum == 0)
-				rnum = MAX_RPAGES;      /* last shot is full */
-		} else
-			rnum = MAX_RPAGES;
-
-		ret = ehca_set_pagebuf(pginfo, rnum, kpage);
-		if (ret) {
-			ehca_err(&shca->ib_device, "ehca_set_pagebuf "
-				 "bad rc, ret=%i rnum=%x kpage=%p",
-				 ret, rnum, kpage);
-			goto ehca_reg_mr_rpages_exit1;
-		}
-
-		if (rnum > 1) {
-			rpage = __pa(kpage);
-			if (!rpage) {
-				ehca_err(&shca->ib_device, "kpage=%p i=%x",
-					 kpage, i);
-				ret = -EFAULT;
-				goto ehca_reg_mr_rpages_exit1;
-			}
-		} else
-			rpage = *kpage;
-
-		h_ret = hipz_h_register_rpage_mr(
-			shca->ipz_hca_handle, e_mr,
-			ehca_encode_hwpage_size(pginfo->hwpage_size),
-			0, rpage, rnum);
-
-		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-			/*
-			 * check for 'registration complete'==H_SUCCESS
-			 * and for 'page registered'==H_PAGE_REGISTERED
-			 */
-			if (h_ret != H_SUCCESS) {
-				ehca_err(&shca->ib_device, "last "
-					 "hipz_reg_rpage_mr failed, h_ret=%lli "
-					 "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
-					 " lkey=%x", h_ret, e_mr, i,
-					 shca->ipz_hca_handle.handle,
-					 e_mr->ipz_mr_handle.handle,
-					 e_mr->ib.ib_mr.lkey);
-				ret = ehca2ib_return_code(h_ret);
-				break;
-			} else
-				ret = 0;
-		} else if (h_ret != H_PAGE_REGISTERED) {
-			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
-				 "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
-				 "mr_hndl=%llx", h_ret, e_mr, i,
-				 e_mr->ib.ib_mr.lkey,
-				 shca->ipz_hca_handle.handle,
-				 e_mr->ipz_mr_handle.handle);
-			ret = ehca2ib_return_code(h_ret);
-			break;
-		} else
-			ret = 0;
-	} /* end for(i) */
-
-
-ehca_reg_mr_rpages_exit1:
-	ehca_free_fw_ctrlblock(kpage);
-ehca_reg_mr_rpages_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
-			 "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
-			 pginfo, pginfo->num_kpages, pginfo->num_hwpages);
-	return ret;
-} /* end ehca_reg_mr_rpages() */
-
-/*----------------------------------------------------------------------*/
-
-inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
-				struct ehca_mr *e_mr,
-				u64 *iova_start,
-				u64 size,
-				u32 acl,
-				struct ehca_pd *e_pd,
-				struct ehca_mr_pginfo *pginfo,
-				u32 *lkey, /*OUT*/
-				u32 *rkey) /*OUT*/
-{
-	int ret;
-	u64 h_ret;
-	u32 hipz_acl;
-	u64 *kpage;
-	u64 rpage;
-	struct ehca_mr_pginfo pginfo_save;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-
-	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!kpage) {
-		ehca_err(&shca->ib_device, "kpage alloc failed");
-		ret = -ENOMEM;
-		goto ehca_rereg_mr_rereg1_exit0;
-	}
-
-	pginfo_save = *pginfo;
-	ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
-	if (ret) {
-		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
-			 "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
-			 "kpage=%p", e_mr, pginfo, pginfo->type,
-			 pginfo->num_kpages, pginfo->num_hwpages, kpage);
-		goto ehca_rereg_mr_rereg1_exit1;
-	}
-	rpage = __pa(kpage);
-	if (!rpage) {
-		ehca_err(&shca->ib_device, "kpage=%p", kpage);
-		ret = -EFAULT;
-		goto ehca_rereg_mr_rereg1_exit1;
-	}
-	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
-				      (u64)iova_start, size, hipz_acl,
-				      e_pd->fw_pd, rpage, &hipzout);
-	if (h_ret != H_SUCCESS) {
-		/*
-		 * reregistration unsuccessful, try it again with the 3 hCalls,
-		 * e.g. this is required in case H_MR_CONDITION
-		 * (MW bound or MR is shared)
-		 */
-		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
-			  "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
-		*pginfo = pginfo_save;
-		ret = -EAGAIN;
-	} else if ((u64 *)hipzout.vaddr != iova_start) {
-		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
-			 "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
-			 "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
-			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
-			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
-		ret = -EFAULT;
-	} else {
-		/*
-		 * successful reregistration
-		 * note: start and start_out are identical for eServer HCAs
-		 */
-		e_mr->num_kpages = pginfo->num_kpages;
-		e_mr->num_hwpages = pginfo->num_hwpages;
-		e_mr->hwpage_size = pginfo->hwpage_size;
-		e_mr->start = iova_start;
-		e_mr->size = size;
-		e_mr->acl = acl;
-		*lkey = hipzout.lkey;
-		*rkey = hipzout.rkey;
-	}
-
-ehca_rereg_mr_rereg1_exit1:
-	ehca_free_fw_ctrlblock(kpage);
-ehca_rereg_mr_rereg1_exit0:
-	if ( ret && (ret != -EAGAIN) )
-		ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
-			 "pginfo=%p num_kpages=%llx num_hwpages=%llx",
-			 ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
-			 pginfo->num_hwpages);
-	return ret;
-} /* end ehca_rereg_mr_rereg1() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-		  struct ehca_mr *e_mr,
-		  u64 *iova_start,
-		  u64 size,
-		  int acl,
-		  struct ehca_pd *e_pd,
-		  struct ehca_mr_pginfo *pginfo,
-		  u32 *lkey,
-		  u32 *rkey)
-{
-	int ret = 0;
-	u64 h_ret;
-	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
-	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
-
-	/* first determine reregistration hCall(s) */
-	if ((pginfo->num_hwpages > MAX_RPAGES) ||
-	    (e_mr->num_hwpages > MAX_RPAGES) ||
-	    (pginfo->num_hwpages > e_mr->num_hwpages)) {
-		ehca_dbg(&shca->ib_device, "Rereg3 case, "
-			 "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
-			 pginfo->num_hwpages, e_mr->num_hwpages);
-		rereg_1_hcall = 0;
-		rereg_3_hcall = 1;
-	}
-
-	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
-		rereg_1_hcall = 0;
-		rereg_3_hcall = 1;
-		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
-		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
-			 e_mr);
-	}
-
-	if (rereg_1_hcall) {
-		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
-					   acl, e_pd, pginfo, lkey, rkey);
-		if (ret) {
-			if (ret == -EAGAIN)
-				rereg_3_hcall = 1;
-			else
-				goto ehca_rereg_mr_exit0;
-		}
-	}
-
-	if (rereg_3_hcall) {
-		struct ehca_mr save_mr;
-
-		/* first deregister old MR */
-		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-		if (h_ret != H_SUCCESS) {
-			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-				 "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
-				 "mr->lkey=%x",
-				 h_ret, e_mr, shca->ipz_hca_handle.handle,
-				 e_mr->ipz_mr_handle.handle,
-				 e_mr->ib.ib_mr.lkey);
-			ret = ehca2ib_return_code(h_ret);
-			goto ehca_rereg_mr_exit0;
-		}
-		/* clean ehca_mr_t, without changing struct ib_mr and lock */
-		save_mr = *e_mr;
-		ehca_mr_deletenew(e_mr);
-
-		/* set some MR values */
-		e_mr->flags = save_mr.flags;
-		e_mr->hwpage_size = save_mr.hwpage_size;
-		e_mr->fmr_page_size = save_mr.fmr_page_size;
-		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
-		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
-		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
-
-		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
-				  e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
-		if (ret) {
-			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
-			memcpy(&e_mr->flags, &(save_mr.flags),
-			       sizeof(struct ehca_mr) - offset);
-			goto ehca_rereg_mr_exit0;
-		}
-	}
-
-ehca_rereg_mr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-			 "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
-			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
-			 acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
-			 rereg_1_hcall, rereg_3_hcall);
-	return ret;
-} /* end ehca_rereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-		       struct ehca_mr *e_fmr)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_pd *e_pd =
-		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
-	struct ehca_mr save_fmr;
-	u32 tmp_lkey, tmp_rkey;
-	struct ehca_mr_pginfo pginfo;
-	struct ehca_mr_hipzout_parms hipzout;
-	struct ehca_mr save_mr;
-
-	if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
-		/*
-		 * note: after using rereg hcall with len=0,
-		 * rereg hcall must be used again for registering pages
-		 */
-		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
-					      0, 0, e_pd->fw_pd, 0, &hipzout);
-		if (h_ret == H_SUCCESS) {
-			/* successful reregistration */
-			e_fmr->start = NULL;
-			e_fmr->size = 0;
-			tmp_lkey = hipzout.lkey;
-			tmp_rkey = hipzout.rkey;
-			return 0;
-		}
-		/*
-		 * should not happen, because length checked above,
-		 * FMRs are not shared and no MW bound to FMRs
-		 */
-		ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
-			 "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
-			 "mr_hndl=%llx lkey=%x lkey_out=%x",
-			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
-			 e_fmr->ipz_mr_handle.handle,
-			 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
-		/* try free and rereg */
-	}
-
-	/* first free old FMR */
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-			 "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
-			 "lkey=%x",
-			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
-			 e_fmr->ipz_mr_handle.handle,
-			 e_fmr->ib.ib_fmr.lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto ehca_unmap_one_fmr_exit0;
-	}
-	/* clean ehca_mr_t, without changing lock */
-	save_fmr = *e_fmr;
-	ehca_mr_deletenew(e_fmr);
-
-	/* set some MR values */
-	e_fmr->flags = save_fmr.flags;
-	e_fmr->hwpage_size = save_fmr.hwpage_size;
-	e_fmr->fmr_page_size = save_fmr.fmr_page_size;
-	e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
-	e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
-	e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
-	e_fmr->acl = save_fmr.acl;
-
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_FMR;
-	ret = ehca_reg_mr(shca, e_fmr, NULL,
-			  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
-			  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
-			  &tmp_rkey, EHCA_REG_MR);
-	if (ret) {
-		u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
-		memcpy(&e_fmr->flags, &(save_mr.flags),
-		       sizeof(struct ehca_mr) - offset);
-	}
-
-ehca_unmap_one_fmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
-			 "fmr_max_pages=%x",
-			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
-	return ret;
-} /* end ehca_unmap_one_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_smr(struct ehca_shca *shca,
-		 struct ehca_mr *e_origmr,
-		 struct ehca_mr *e_newmr,
-		 u64 *iova_start,
-		 int acl,
-		 struct ehca_pd *e_pd,
-		 u32 *lkey, /*OUT*/
-		 u32 *rkey) /*OUT*/
-{
-	int ret = 0;
-	u64 h_ret;
-	u32 hipz_acl;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
-				    &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
-			 "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
-			 shca->ipz_hca_handle.handle,
-			 e_origmr->ipz_mr_handle.handle,
-			 e_origmr->ib.ib_mr.lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto ehca_reg_smr_exit0;
-	}
-	/* successful registration */
-	e_newmr->num_kpages = e_origmr->num_kpages;
-	e_newmr->num_hwpages = e_origmr->num_hwpages;
-	e_newmr->hwpage_size   = e_origmr->hwpage_size;
-	e_newmr->start = iova_start;
-	e_newmr->size = e_origmr->size;
-	e_newmr->acl = acl;
-	e_newmr->ipz_mr_handle = hipzout.handle;
-	*lkey = hipzout.lkey;
-	*rkey = hipzout.rkey;
-	return 0;
-
-ehca_reg_smr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
-			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
-			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
-	return ret;
-} /* end ehca_reg_smr() */
-
-/*----------------------------------------------------------------------*/
-static inline void *ehca_calc_sectbase(int top, int dir, int idx)
-{
-	unsigned long ret = idx;
-	ret |= dir << EHCA_DIR_INDEX_SHIFT;
-	ret |= top << EHCA_TOP_INDEX_SHIFT;
-	return __va(ret << SECTION_SIZE_BITS);
-}
-
-#define ehca_bmap_valid(entry) \
-	((u64)entry != (u64)EHCA_INVAL_ADDR)
-
-static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
-			       struct ehca_shca *shca, struct ehca_mr *mr,
-			       struct ehca_mr_pginfo *pginfo)
-{
-	u64 h_ret = 0;
-	unsigned long page = 0;
-	u64 rpage = __pa(kpage);
-	int page_count;
-
-	void *sectbase = ehca_calc_sectbase(top, dir, idx);
-	if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
-		ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
-					   "hwpage_size does not fit to "
-					   "section start address");
-	}
-	page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
-
-	while (page < page_count) {
-		u64 rnum;
-		for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
-		     rnum++) {
-			void *pg = sectbase + ((page++) * pginfo->hwpage_size);
-			kpage[rnum] = __pa(pg);
-		}
-
-		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
-			ehca_encode_hwpage_size(pginfo->hwpage_size),
-			0, rpage, rnum);
-
-		if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
-			ehca_err(&shca->ib_device, "register_rpage_mr failed");
-			return h_ret;
-		}
-	}
-	return h_ret;
-}
-
-static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
-				struct ehca_shca *shca, struct ehca_mr *mr,
-				struct ehca_mr_pginfo *pginfo)
-{
-	u64 hret = H_SUCCESS;
-	int idx;
-
-	for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
-			continue;
-
-		hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
-					   pginfo);
-		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-				return hret;
-	}
-	return hret;
-}
-
-static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
-				    struct ehca_mr *mr,
-				    struct ehca_mr_pginfo *pginfo)
-{
-	u64 hret = H_SUCCESS;
-	int dir;
-
-	for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-			continue;
-
-		hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
-		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-				return hret;
-	}
-	return hret;
-}
-
-/* register internal max-MR to internal SHCA */
-int ehca_reg_internal_maxmr(
-	struct ehca_shca *shca,
-	struct ehca_pd *e_pd,
-	struct ehca_mr **e_maxmr)  /*OUT*/
-{
-	int ret;
-	struct ehca_mr *e_mr;
-	u64 *iova_start;
-	u64 size_maxmr;
-	struct ehca_mr_pginfo pginfo;
-	u32 num_kpages;
-	u32 num_hwpages;
-	u64 hw_pgsize;
-
-	if (!ehca_bmap) {
-		ret = -EFAULT;
-		goto ehca_reg_internal_maxmr_exit0;
-	}
-
-	e_mr = ehca_mr_new();
-	if (!e_mr) {
-		ehca_err(&shca->ib_device, "out of memory");
-		ret = -ENOMEM;
-		goto ehca_reg_internal_maxmr_exit0;
-	}
-	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-
-	/* register internal max-MR on HCA */
-	size_maxmr = ehca_mr_len;
-	iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
-	num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
-				PAGE_SIZE);
-	hw_pgsize = ehca_get_max_hwpage_size(shca);
-	num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
-				 hw_pgsize);
-
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_PHYS;
-	pginfo.num_kpages = num_kpages;
-	pginfo.num_hwpages = num_hwpages;
-	pginfo.hwpage_size = hw_pgsize;
-	pginfo.u.phy.addr = 0;
-	pginfo.u.phy.size = size_maxmr;
-
-	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
-			  &pginfo, &e_mr->ib.ib_mr.lkey,
-			  &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
-	if (ret) {
-		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
-			 "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
-			 "num_hwpages=%x", e_mr, iova_start, size_maxmr,
-			 num_kpages, num_hwpages);
-		goto ehca_reg_internal_maxmr_exit1;
-	}
-
-	/* successful registration of all pages */
-	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
-	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
-	e_mr->ib.ib_mr.uobject = NULL;
-	atomic_inc(&(e_pd->ib_pd.usecnt));
-	*e_maxmr = e_mr;
-	return 0;
-
-ehca_reg_internal_maxmr_exit1:
-	ehca_mr_delete(e_mr);
-ehca_reg_internal_maxmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
-			 ret, shca, e_pd, e_maxmr);
-	return ret;
-} /* end ehca_reg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-		   struct ehca_mr *e_newmr,
-		   u64 *iova_start,
-		   int acl,
-		   struct ehca_pd *e_pd,
-		   u32 *lkey,
-		   u32 *rkey)
-{
-	u64 h_ret;
-	struct ehca_mr *e_origmr = shca->maxmr;
-	u32 hipz_acl;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
-				    &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-			 "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
-			 e_origmr->ipz_mr_handle.handle,
-			 e_origmr->ib.ib_mr.lkey);
-		return ehca2ib_return_code(h_ret);
-	}
-	/* successful registration */
-	e_newmr->num_kpages = e_origmr->num_kpages;
-	e_newmr->num_hwpages = e_origmr->num_hwpages;
-	e_newmr->hwpage_size = e_origmr->hwpage_size;
-	e_newmr->start = iova_start;
-	e_newmr->size = e_origmr->size;
-	e_newmr->acl = acl;
-	e_newmr->ipz_mr_handle = hipzout.handle;
-	*lkey = hipzout.lkey;
-	*rkey = hipzout.rkey;
-	return 0;
-} /* end ehca_reg_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
-{
-	int ret;
-	struct ehca_mr *e_maxmr;
-	struct ib_pd *ib_pd;
-
-	if (!shca->maxmr) {
-		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
-		ret = -EINVAL;
-		goto ehca_dereg_internal_maxmr_exit0;
-	}
-
-	e_maxmr = shca->maxmr;
-	ib_pd = e_maxmr->ib.ib_mr.pd;
-	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
-
-	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
-	if (ret) {
-		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
-			 "ret=%i e_maxmr=%p shca=%p lkey=%x",
-			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
-		shca->maxmr = e_maxmr;
-		goto ehca_dereg_internal_maxmr_exit0;
-	}
-
-	atomic_dec(&ib_pd->usecnt);
-
-ehca_dereg_internal_maxmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
-			 ret, shca, shca->maxmr);
-	return ret;
-} /* end ehca_dereg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* check page list of map FMR verb for validness */
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-			     u64 *page_list,
-			     int list_len)
-{
-	u32 i;
-	u64 *page;
-
-	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
-		ehca_gen_err("bad list_len, list_len=%x "
-			     "e_fmr->fmr_max_pages=%x fmr=%p",
-			     list_len, e_fmr->fmr_max_pages, e_fmr);
-		return -EINVAL;
-	}
-
-	/* each page must be aligned */
-	page = page_list;
-	for (i = 0; i < list_len; i++) {
-		if (*page % e_fmr->fmr_page_size) {
-			ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
-				     "fmr_page_size=%x", i, *page, page, e_fmr,
-				     e_fmr->fmr_page_size);
-			return -EINVAL;
-		}
-		page++;
-	}
-
-	return 0;
-} /* end ehca_fmr_check_page_list() */
-
-/*----------------------------------------------------------------------*/
-
-/* PAGE_SIZE >= pginfo->hwpage_size */
-static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
-				  u32 number,
-				  u64 *kpage)
-{
-	int ret = 0;
-	u64 pgaddr;
-	u32 j = 0;
-	int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-	struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-	while (*sg != NULL) {
-		pgaddr = page_to_pfn(sg_page(*sg))
-			<< PAGE_SHIFT;
-		*kpage = pgaddr + (pginfo->next_hwpage *
-				   pginfo->hwpage_size);
-		if (!(*kpage)) {
-			ehca_gen_err("pgaddr=%llx "
-				     "sg_dma_address=%llx "
-				     "entry=%llx next_hwpage=%llx",
-				     pgaddr, (u64)sg_dma_address(*sg),
-				     pginfo->u.usr.next_nmap,
-				     pginfo->next_hwpage);
-			return -EFAULT;
-		}
-		(pginfo->hwpage_cnt)++;
-		(pginfo->next_hwpage)++;
-		kpage++;
-		if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
-			(pginfo->kpage_cnt)++;
-			(pginfo->u.usr.next_nmap)++;
-			pginfo->next_hwpage = 0;
-			*sg = sg_next(*sg);
-		}
-		j++;
-		if (j >= number)
-			break;
-	}
-
-	return ret;
-}
-
-/*
- * check given pages for contiguous layout
- * last page addr is returned in prev_pgaddr for further check
- */
-static int ehca_check_kpages_per_ate(struct scatterlist **sg,
-				     int num_pages,
-				     u64 *prev_pgaddr)
-{
-	for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
-		u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
-		if (ehca_debug_level >= 3)
-			ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
-				     *(u64 *)__va(pgaddr));
-		if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
-			ehca_gen_err("uncontiguous page found pgaddr=%llx "
-				     "prev_pgaddr=%llx entries_left_in_hwpage=%x",
-				     pgaddr, *prev_pgaddr, num_pages);
-			return -EINVAL;
-		}
-		*prev_pgaddr = pgaddr;
-	}
-	return 0;
-}
-
-/* PAGE_SIZE < pginfo->hwpage_size */
-static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
-				  u32 number,
-				  u64 *kpage)
-{
-	int ret = 0;
-	u64 pgaddr, prev_pgaddr;
-	u32 j = 0;
-	int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
-	int nr_kpages = kpages_per_hwpage;
-	struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-	while (*sg != NULL) {
-
-		if (nr_kpages == kpages_per_hwpage) {
-			pgaddr = (page_to_pfn(sg_page(*sg))
-				   << PAGE_SHIFT);
-			*kpage = pgaddr;
-			if (!(*kpage)) {
-				ehca_gen_err("pgaddr=%llx entry=%llx",
-					     pgaddr, pginfo->u.usr.next_nmap);
-				ret = -EFAULT;
-				return ret;
-			}
-			/*
-			 * The first page in a hwpage must be aligned;
-			 * the first MR page is exempt from this rule.
-			 */
-			if (pgaddr & (pginfo->hwpage_size - 1)) {
-				if (pginfo->hwpage_cnt) {
-					ehca_gen_err(
-						"invalid alignment "
-						"pgaddr=%llx entry=%llx "
-						"mr_pgsize=%llx",
-						pgaddr, pginfo->u.usr.next_nmap,
-						pginfo->hwpage_size);
-					ret = -EFAULT;
-					return ret;
-				}
-				/* first MR page */
-				pginfo->kpage_cnt =
-					(pgaddr &
-					 (pginfo->hwpage_size - 1)) >>
-					PAGE_SHIFT;
-				nr_kpages -= pginfo->kpage_cnt;
-				*kpage = pgaddr &
-					 ~(pginfo->hwpage_size - 1);
-			}
-			if (ehca_debug_level >= 3) {
-				u64 val = *(u64 *)__va(pgaddr);
-				ehca_gen_dbg("kpage=%llx page=%llx "
-					     "value=%016llx",
-					     *kpage, pgaddr, val);
-			}
-			prev_pgaddr = pgaddr;
-			*sg = sg_next(*sg);
-			pginfo->kpage_cnt++;
-			pginfo->u.usr.next_nmap++;
-			nr_kpages--;
-			if (!nr_kpages)
-				goto next_kpage;
-			continue;
-		}
-
-		ret = ehca_check_kpages_per_ate(sg, nr_kpages,
-						&prev_pgaddr);
-		if (ret)
-			return ret;
-		pginfo->kpage_cnt += nr_kpages;
-		pginfo->u.usr.next_nmap += nr_kpages;
-
-next_kpage:
-		nr_kpages = kpages_per_hwpage;
-		(pginfo->hwpage_cnt)++;
-		kpage++;
-		j++;
-		if (j >= number)
-			break;
-	}
-
-	return ret;
-}
-
-static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
-				 u32 number, u64 *kpage)
-{
-	int ret = 0;
-	u64 addr = pginfo->u.phy.addr;
-	u64 size = pginfo->u.phy.size;
-	u64 num_hw, offs_hw;
-	u32 i = 0;
-
-	num_hw  = NUM_CHUNKS((addr % pginfo->hwpage_size) + size,
-				pginfo->hwpage_size);
-	offs_hw = (addr & ~(pginfo->hwpage_size - 1)) / pginfo->hwpage_size;
-
-	while (pginfo->next_hwpage < offs_hw + num_hw) {
-		/* sanity check */
-		if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
-		    (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
-			ehca_gen_err("kpage_cnt >= num_kpages, "
-				     "kpage_cnt=%llx num_kpages=%llx "
-				     "hwpage_cnt=%llx "
-				     "num_hwpages=%llx i=%x",
-				     pginfo->kpage_cnt,
-				     pginfo->num_kpages,
-				     pginfo->hwpage_cnt,
-				     pginfo->num_hwpages, i);
-			return -EFAULT;
-		}
-		*kpage = (addr & ~(pginfo->hwpage_size - 1)) +
-			 (pginfo->next_hwpage * pginfo->hwpage_size);
-		if ( !(*kpage) && addr ) {
-			ehca_gen_err("addr=%llx size=%llx "
-				     "next_hwpage=%llx", addr,
-				     size, pginfo->next_hwpage);
-			return -EFAULT;
-		}
-		(pginfo->hwpage_cnt)++;
-		(pginfo->next_hwpage)++;
-		if (PAGE_SIZE >= pginfo->hwpage_size) {
-			if (pginfo->next_hwpage %
-			    (PAGE_SIZE / pginfo->hwpage_size) == 0)
-				(pginfo->kpage_cnt)++;
-		} else
-			pginfo->kpage_cnt += pginfo->hwpage_size /
-				PAGE_SIZE;
-		kpage++;
-		i++;
-		if (i >= number) break;
-	}
-	if (pginfo->next_hwpage >= offs_hw + num_hw) {
-		pginfo->next_hwpage = 0;
-	}
-
-	return ret;
-}
-
-static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
-				u32 number, u64 *kpage)
-{
-	int ret = 0;
-	u64 *fmrlist;
-	u32 i;
-
-	/* loop over desired page_list entries */
-	fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
-	for (i = 0; i < number; i++) {
-		*kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
-			   pginfo->next_hwpage * pginfo->hwpage_size;
-		if ( !(*kpage) ) {
-			ehca_gen_err("*fmrlist=%llx fmrlist=%p "
-				     "next_listelem=%llx next_hwpage=%llx",
-				     *fmrlist, fmrlist,
-				     pginfo->u.fmr.next_listelem,
-				     pginfo->next_hwpage);
-			return -EFAULT;
-		}
-		(pginfo->hwpage_cnt)++;
-		if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
-			if (pginfo->next_hwpage %
-			    (pginfo->u.fmr.fmr_pgsize /
-			     pginfo->hwpage_size) == 0) {
-				(pginfo->kpage_cnt)++;
-				(pginfo->u.fmr.next_listelem)++;
-				fmrlist++;
-				pginfo->next_hwpage = 0;
-			} else
-				(pginfo->next_hwpage)++;
-		} else {
-			unsigned int cnt_per_hwpage = pginfo->hwpage_size /
-				pginfo->u.fmr.fmr_pgsize;
-			unsigned int j;
-			u64 prev = *kpage;
-			/* check if adrs are contiguous */
-			for (j = 1; j < cnt_per_hwpage; j++) {
-				u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
-				if (prev + pginfo->u.fmr.fmr_pgsize != p) {
-					ehca_gen_err("uncontiguous fmr pages "
-						     "found prev=%llx p=%llx "
-						     "idx=%x", prev, p, i + j);
-					return -EINVAL;
-				}
-				prev = p;
-			}
-			pginfo->kpage_cnt += cnt_per_hwpage;
-			pginfo->u.fmr.next_listelem += cnt_per_hwpage;
-			fmrlist += cnt_per_hwpage;
-		}
-		kpage++;
-	}
-	return ret;
-}
-
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-		     u32 number,
-		     u64 *kpage)
-{
-	int ret;
-
-	switch (pginfo->type) {
-	case EHCA_MR_PGI_PHYS:
-		ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
-		break;
-	case EHCA_MR_PGI_USER:
-		ret = PAGE_SIZE >= pginfo->hwpage_size ?
-			ehca_set_pagebuf_user1(pginfo, number, kpage) :
-			ehca_set_pagebuf_user2(pginfo, number, kpage);
-		break;
-	case EHCA_MR_PGI_FMR:
-		ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
-		break;
-	default:
-		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
-		ret = -EFAULT;
-		break;
-	}
-	return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check MR if it is a max-MR, i.e. uses whole memory
- * in case it's a max-MR 1 is returned, else 0
- */
-int ehca_mr_is_maxmr(u64 size,
-		     u64 *iova_start)
-{
-	/* a MR is treated as max-MR only if it fits following: */
-	if ((size == ehca_mr_len) &&
-	    (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
-		ehca_gen_dbg("this is a max-MR");
-		return 1;
-	} else
-		return 0;
-} /* end ehca_mr_is_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* map access control for MR/MW. This routine is used for MR and MW. */
-void ehca_mrmw_map_acl(int ib_acl,
-		       u32 *hipz_acl)
-{
-	*hipz_acl = 0;
-	if (ib_acl & IB_ACCESS_REMOTE_READ)
-		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
-	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
-		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
-	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
-		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
-	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
-		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
-	if (ib_acl & IB_ACCESS_MW_BIND)
-		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
-} /* end ehca_mrmw_map_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
-{
-	*hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
-} /* end ehca_mrmw_set_pgsize_hipz_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * reverse map access control for MR/MW.
- * This routine is used for MR and MW.
- */
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-			       int *ib_acl) /*OUT*/
-{
-	*ib_acl = 0;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
-		*ib_acl |= IB_ACCESS_REMOTE_READ;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
-		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
-		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
-		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
-		*ib_acl |= IB_ACCESS_MW_BIND;
-} /* end ehca_mrmw_reverse_map_acl() */
-
-
-/*----------------------------------------------------------------------*/
-
-/*
- * MR destructor and constructor
- * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
- * except struct ib_mr and spinlock
- */
-void ehca_mr_deletenew(struct ehca_mr *mr)
-{
-	mr->flags = 0;
-	mr->num_kpages = 0;
-	mr->num_hwpages = 0;
-	mr->acl = 0;
-	mr->start = NULL;
-	mr->fmr_page_size = 0;
-	mr->fmr_max_pages = 0;
-	mr->fmr_max_maps = 0;
-	mr->fmr_map_cnt = 0;
-	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
-	memset(&mr->galpas, 0, sizeof(mr->galpas));
-} /* end ehca_mr_deletenew() */
-
-int ehca_init_mrmw_cache(void)
-{
-	mr_cache = kmem_cache_create("ehca_cache_mr",
-				     sizeof(struct ehca_mr), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!mr_cache)
-		return -ENOMEM;
-	mw_cache = kmem_cache_create("ehca_cache_mw",
-				     sizeof(struct ehca_mw), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!mw_cache) {
-		kmem_cache_destroy(mr_cache);
-		mr_cache = NULL;
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void ehca_cleanup_mrmw_cache(void)
-{
-	kmem_cache_destroy(mr_cache);
-	kmem_cache_destroy(mw_cache);
-}
-
-static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
-				     int dir)
-{
-	if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
-		ehca_top_bmap->dir[dir] =
-			kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
-		if (!ehca_top_bmap->dir[dir])
-			return -ENOMEM;
-		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-		memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
-	}
-	return 0;
-}
-
-static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
-{
-	if (!ehca_bmap_valid(ehca_bmap->top[top])) {
-		ehca_bmap->top[top] =
-			kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
-		if (!ehca_bmap->top[top])
-			return -ENOMEM;
-		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-		memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
-	}
-	return ehca_init_top_bmap(ehca_bmap->top[top], dir);
-}
-
-static inline int ehca_calc_index(unsigned long i, unsigned long s)
-{
-	return (i >> s) & EHCA_INDEX_MASK;
-}
-
-void ehca_destroy_busmap(void)
-{
-	int top, dir;
-
-	if (!ehca_bmap)
-		return;
-
-	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]))
-			continue;
-		for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-			if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-				continue;
-
-			kfree(ehca_bmap->top[top]->dir[dir]);
-		}
-
-		kfree(ehca_bmap->top[top]);
-	}
-
-	kfree(ehca_bmap);
-	ehca_bmap = NULL;
-}
-
-static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
-{
-	unsigned long i, start_section, end_section;
-	int top, dir, idx;
-
-	if (!nr_pages)
-		return 0;
-
-	if (!ehca_bmap) {
-		ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
-		if (!ehca_bmap)
-			return -ENOMEM;
-		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-		memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
-	}
-
-	start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
-	end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
-	for (i = start_section; i < end_section; i++) {
-		int ret;
-		top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
-		dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
-		idx = i & EHCA_INDEX_MASK;
-
-		ret = ehca_init_bmap(ehca_bmap, top, dir);
-		if (ret) {
-			ehca_destroy_busmap();
-			return ret;
-		}
-		ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
-		ehca_mr_len += EHCA_SECTSIZE;
-	}
-	return 0;
-}
-
-static int ehca_is_hugepage(unsigned long pfn)
-{
-	int page_order;
-
-	if (pfn & EHCA_HUGEPAGE_PFN_MASK)
-		return 0;
-
-	page_order = compound_order(pfn_to_page(pfn));
-	if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
-		return 0;
-
-	return 1;
-}
-
-static int ehca_create_busmap_callback(unsigned long initial_pfn,
-				       unsigned long total_nr_pages, void *arg)
-{
-	int ret;
-	unsigned long pfn, start_pfn, end_pfn, nr_pages;
-
-	if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
-		return ehca_update_busmap(initial_pfn, total_nr_pages);
-
-	/* Given chunk is >= 16GB -> check for hugepages */
-	start_pfn = initial_pfn;
-	end_pfn = initial_pfn + total_nr_pages;
-	pfn = start_pfn;
-
-	while (pfn < end_pfn) {
-		if (ehca_is_hugepage(pfn)) {
-			/* Add mem found in front of the hugepage */
-			nr_pages = pfn - start_pfn;
-			ret = ehca_update_busmap(start_pfn, nr_pages);
-			if (ret)
-				return ret;
-			/* Skip the hugepage */
-			pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
-			start_pfn = pfn;
-		} else
-			pfn += (EHCA_SECTSIZE / PAGE_SIZE);
-	}
-
-	/* Add mem found behind the hugepage(s)  */
-	nr_pages = pfn - start_pfn;
-	return ehca_update_busmap(start_pfn, nr_pages);
-}
-
-int ehca_create_busmap(void)
-{
-	int ret;
-
-	ehca_mr_len = 0;
-	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
-				   ehca_create_busmap_callback);
-	return ret;
-}
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-				   struct ehca_mr *e_mr,
-				   struct ehca_mr_pginfo *pginfo)
-{
-	int top;
-	u64 hret, *kpage;
-
-	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!kpage) {
-		ehca_err(&shca->ib_device, "kpage alloc failed");
-		return -ENOMEM;
-	}
-	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]))
-			continue;
-		hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
-		if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
-			break;
-	}
-
-	ehca_free_fw_ctrlblock(kpage);
-
-	if (hret == H_SUCCESS)
-		return 0; /* Everything is fine */
-	else {
-		ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
-				 "h_ret=%lli e_mr=%p top=%x lkey=%x "
-				 "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
-				 e_mr->ib.ib_mr.lkey,
-				 shca->ipz_hca_handle.handle,
-				 e_mr->ipz_mr_handle.handle);
-		return ehca2ib_return_code(hret);
-	}
-}
-
-static u64 ehca_map_vaddr(void *caddr)
-{
-	int top, dir, idx;
-	unsigned long abs_addr, offset;
-	u64 entry;
-
-	if (!ehca_bmap)
-		return EHCA_INVAL_ADDR;
-
-	abs_addr = __pa(caddr);
-	top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
-	if (!ehca_bmap_valid(ehca_bmap->top[top]))
-		return EHCA_INVAL_ADDR;
-
-	dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
-	if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-		return EHCA_INVAL_ADDR;
-
-	idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
-
-	entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
-	if (ehca_bmap_valid(entry)) {
-		offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
-		return entry | offset;
-	} else
-		return EHCA_INVAL_ADDR;
-}
-
-static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-	return dma_addr == EHCA_INVAL_ADDR;
-}
-
-static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
-			       size_t size, enum dma_data_direction direction)
-{
-	if (cpu_addr)
-		return ehca_map_vaddr(cpu_addr);
-	else
-		return EHCA_INVAL_ADDR;
-}
-
-static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-				  enum dma_data_direction direction)
-{
-	/* This is only a stub; nothing to be done here */
-}
-
-static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
-			     unsigned long offset, size_t size,
-			     enum dma_data_direction direction)
-{
-	u64 addr;
-
-	if (offset + size > PAGE_SIZE)
-		return EHCA_INVAL_ADDR;
-
-	addr = ehca_map_vaddr(page_address(page));
-	if (!ehca_dma_mapping_error(dev, addr))
-		addr += offset;
-
-	return addr;
-}
-
-static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-				enum dma_data_direction direction)
-{
-	/* This is only a stub; nothing to be done here */
-}
-
-static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-			   int nents, enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i) {
-		u64 addr;
-		addr = ehca_map_vaddr(sg_virt(sg));
-		if (ehca_dma_mapping_error(dev, addr))
-			return 0;
-
-		sg->dma_address = addr;
-		sg->dma_length = sg->length;
-	}
-	return nents;
-}
-
-static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
-			      int nents, enum dma_data_direction direction)
-{
-	/* This is only a stub; nothing to be done here */
-}
-
-static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-					 size_t size,
-					 enum dma_data_direction dir)
-{
-	dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
-}
-
-static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
-					    size_t size,
-					    enum dma_data_direction dir)
-{
-	dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
-				     u64 *dma_handle, gfp_t flag)
-{
-	struct page *p;
-	void *addr = NULL;
-	u64 dma_addr;
-
-	p = alloc_pages(flag, get_order(size));
-	if (p) {
-		addr = page_address(p);
-		dma_addr = ehca_map_vaddr(addr);
-		if (ehca_dma_mapping_error(dev, dma_addr)) {
-			free_pages((unsigned long)addr,	get_order(size));
-			return NULL;
-		}
-		if (dma_handle)
-			*dma_handle = dma_addr;
-		return addr;
-	}
-	return NULL;
-}
-
-static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
-				   void *cpu_addr, u64 dma_handle)
-{
-	if (cpu_addr && size)
-		free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-
-struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
-	.mapping_error          = ehca_dma_mapping_error,
-	.map_single             = ehca_dma_map_single,
-	.unmap_single           = ehca_dma_unmap_single,
-	.map_page               = ehca_dma_map_page,
-	.unmap_page             = ehca_dma_unmap_page,
-	.map_sg                 = ehca_dma_map_sg,
-	.unmap_sg               = ehca_dma_unmap_sg,
-	.sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
-	.sync_single_for_device = ehca_dma_sync_single_for_device,
-	.alloc_coherent         = ehca_dma_alloc_coherent,
-	.free_coherent          = ehca_dma_free_coherent,
-};
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
deleted file mode 100644
index 52bfa95..0000000
--- a/drivers/staging/rdma/ehca/ehca_mrmw.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW declarations and inline functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _EHCA_MRMW_H_
-#define _EHCA_MRMW_H_
-
-enum ehca_reg_type {
-	EHCA_REG_MR,
-	EHCA_REG_BUSMAP_MR
-};
-
-int ehca_reg_mr(struct ehca_shca *shca,
-		struct ehca_mr *e_mr,
-		u64 *iova_start,
-		u64 size,
-		int acl,
-		struct ehca_pd *e_pd,
-		struct ehca_mr_pginfo *pginfo,
-		u32 *lkey,
-		u32 *rkey,
-		enum ehca_reg_type reg_type);
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-		       struct ehca_mr *e_mr,
-		       struct ehca_mr_pginfo *pginfo);
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-		  struct ehca_mr *e_mr,
-		  u64 *iova_start,
-		  u64 size,
-		  int mr_access_flags,
-		  struct ehca_pd *e_pd,
-		  struct ehca_mr_pginfo *pginfo,
-		  u32 *lkey,
-		  u32 *rkey);
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-		       struct ehca_mr *e_fmr);
-
-int ehca_reg_smr(struct ehca_shca *shca,
-		 struct ehca_mr *e_origmr,
-		 struct ehca_mr *e_newmr,
-		 u64 *iova_start,
-		 int acl,
-		 struct ehca_pd *e_pd,
-		 u32 *lkey,
-		 u32 *rkey);
-
-int ehca_reg_internal_maxmr(struct ehca_shca *shca,
-			    struct ehca_pd *e_pd,
-			    struct ehca_mr **maxmr);
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-		   struct ehca_mr *e_newmr,
-		   u64 *iova_start,
-		   int acl,
-		   struct ehca_pd *e_pd,
-		   u32 *lkey,
-		   u32 *rkey);
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-			     u64 *page_list,
-			     int list_len);
-
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-		     u32 number,
-		     u64 *kpage);
-
-int ehca_mr_is_maxmr(u64 size,
-		     u64 *iova_start);
-
-void ehca_mrmw_map_acl(int ib_acl,
-		       u32 *hipz_acl);
-
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
-
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-			       int *ib_acl);
-
-void ehca_mr_deletenew(struct ehca_mr *mr);
-
-int ehca_create_busmap(void);
-
-void ehca_destroy_busmap(void);
-
-extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
-#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c
deleted file mode 100644
index 2a8aae4..0000000
--- a/drivers/staging/rdma/ehca/ehca_pd.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  PD functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-
-static struct kmem_cache *pd_cache;
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-			    struct ib_ucontext *context, struct ib_udata *udata)
-{
-	struct ehca_pd *pd;
-	int i;
-
-	pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
-	if (!pd) {
-		ehca_err(device, "device=%p context=%p out of memory",
-			 device, context);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	for (i = 0; i < 2; i++) {
-		INIT_LIST_HEAD(&pd->free[i]);
-		INIT_LIST_HEAD(&pd->full[i]);
-	}
-	mutex_init(&pd->lock);
-
-	/*
-	 * Kernel PD: when device = -1, 0
-	 * User   PD: when context != -1
-	 */
-	if (!context) {
-		/*
-		 * Kernel PDs after init reuses always
-		 * the one created in ehca_shca_reopen()
-		 */
-		struct ehca_shca *shca = container_of(device, struct ehca_shca,
-						      ib_device);
-		pd->fw_pd.value = shca->pd->fw_pd.value;
-	} else
-		pd->fw_pd.value = (u64)pd;
-
-	return &pd->ib_pd;
-}
-
-int ehca_dealloc_pd(struct ib_pd *pd)
-{
-	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-	int i, leftovers = 0;
-	struct ipz_small_queue_page *page, *tmp;
-
-	for (i = 0; i < 2; i++) {
-		list_splice(&my_pd->full[i], &my_pd->free[i]);
-		list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
-			leftovers = 1;
-			free_page(page->page);
-			kmem_cache_free(small_qp_cache, page);
-		}
-	}
-
-	if (leftovers)
-		ehca_warn(pd->device,
-			  "Some small queue pages were not freed");
-
-	kmem_cache_free(pd_cache, my_pd);
-
-	return 0;
-}
-
-int ehca_init_pd_cache(void)
-{
-	pd_cache = kmem_cache_create("ehca_cache_pd",
-				     sizeof(struct ehca_pd), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!pd_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_pd_cache(void)
-{
-	kmem_cache_destroy(pd_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h
deleted file mode 100644
index 90c4efa..0000000
--- a/drivers/staging/rdma/ehca/ehca_qes.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Hardware request structures
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _EHCA_QES_H_
-#define _EHCA_QES_H_
-
-#include "ehca_tools.h"
-
-/* virtual scatter gather entry to specify remote addresses with length */
-struct ehca_vsgentry {
-	u64 vaddr;
-	u32 lkey;
-	u32 length;
-};
-
-#define GRH_FLAG_MASK        EHCA_BMASK_IBM( 7,  7)
-#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM( 0,  3)
-#define GRH_TCLASS_MASK      EHCA_BMASK_IBM( 4, 12)
-#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13, 31)
-#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32, 47)
-#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48, 55)
-#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56, 63)
-
-/*
- * Unreliable Datagram Address Vector Format
- * see IBTA Vol1 chapter 8.3 Global Routing Header
- */
-struct ehca_ud_av {
-	u8 sl;
-	u8 lnh;
-	u16 dlid;
-	u8 reserved1;
-	u8 reserved2;
-	u8 reserved3;
-	u8 slid_path_bits;
-	u8 reserved4;
-	u8 ipd;
-	u8 reserved5;
-	u8 pmtu;
-	u32 reserved6;
-	u64 reserved7;
-	union {
-		struct {
-			u64 word_0; /* always set to 6  */
-			/*should be 0x1B for IB transport */
-			u64 word_1;
-			u64 word_2;
-			u64 word_3;
-			u64 word_4;
-		} grh;
-		struct {
-			u32 wd_0;
-			u32 wd_1;
-			/* DWord_1 --> SGID */
-
-			u32 sgid_wd3;
-			u32 sgid_wd2;
-
-			u32 sgid_wd1;
-			u32 sgid_wd0;
-			/* DWord_3 --> DGID */
-
-			u32 dgid_wd3;
-			u32 dgid_wd2;
-
-			u32 dgid_wd1;
-			u32 dgid_wd0;
-		} grh_l;
-	};
-};
-
-/* maximum number of sg entries allowed in a WQE */
-#define MAX_WQE_SG_ENTRIES 252
-
-#define WQE_OPTYPE_SEND             0x80
-#define WQE_OPTYPE_RDMAREAD         0x40
-#define WQE_OPTYPE_RDMAWRITE        0x20
-#define WQE_OPTYPE_CMPSWAP          0x10
-#define WQE_OPTYPE_FETCHADD         0x08
-#define WQE_OPTYPE_BIND             0x04
-
-#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
-#define WQE_WRFLAG_FENCE            0x40
-#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
-#define WQE_WRFLAG_SOLIC_EVENT      0x10
-
-#define WQEF_CACHE_HINT             0x80
-#define WQEF_CACHE_HINT_RD_WR       0x40
-#define WQEF_TIMED_WQE              0x20
-#define WQEF_PURGE                  0x08
-#define WQEF_HIGH_NIBBLE            0xF0
-
-#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
-#define MW_BIND_ACCESSCTRL_R_READ    0x20
-#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
-
-struct ehca_wqe {
-	u64 work_request_id;
-	u8 optype;
-	u8 wr_flag;
-	u16 pkeyi;
-	u8 wqef;
-	u8 nr_of_data_seg;
-	u16 wqe_provided_slid;
-	u32 destination_qp_number;
-	u32 resync_psn_sqp;
-	u32 local_ee_context_qkey;
-	u32 immediate_data;
-	union {
-		struct {
-			u64 remote_virtual_address;
-			u32 rkey;
-			u32 reserved;
-			u64 atomic_1st_op_dma_len;
-			u64 atomic_2nd_op;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-
-		} nud;
-		struct {
-			u64 ehca_ud_av_ptr;
-			u64 reserved1;
-			u64 reserved2;
-			u64 reserved3;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-		} ud_avp;
-		struct {
-			struct ehca_ud_av ud_av;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
-						     2];
-		} ud_av;
-		struct {
-			u64 reserved0;
-			u64 reserved1;
-			u64 reserved2;
-			u64 reserved3;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-		} all_rcv;
-
-		struct {
-			u64 reserved;
-			u32 rkey;
-			u32 old_rkey;
-			u64 reserved1;
-			u64 reserved2;
-			u64 virtual_address;
-			u32 reserved3;
-			u32 length;
-			u32 reserved4;
-			u16 reserved5;
-			u8 reserved6;
-			u8 lr_ctl;
-			u32 lkey;
-			u32 reserved7;
-			u64 reserved8;
-			u64 reserved9;
-			u64 reserved10;
-			u64 reserved11;
-		} bind;
-		struct {
-			u64 reserved12;
-			u64 reserved13;
-			u32 size;
-			u32 start;
-		} inline_data;
-	} u;
-
-};
-
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
-#define WC_IMM_DATA     EHCA_BMASK_IBM(1, 1)
-#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2, 2)
-#define WC_SE_BIT       EHCA_BMASK_IBM(3, 3)
-#define WC_STATUS_ERROR_BIT 0x80000000
-#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
-#define WC_STATUS_PURGE_BIT 0x10
-#define WC_SEND_RECEIVE_BIT 0x80
-
-struct ehca_cqe {
-	u64 work_request_id;
-	u8 optype;
-	u8 w_completion_flags;
-	u16 reserved1;
-	u32 nr_bytes_transferred;
-	u32 immediate_data;
-	u32 local_qp_number;
-	u8 freed_resource_count;
-	u8 service_level;
-	u16 wqe_count;
-	u32 qp_token;
-	u32 qkey_ee_token;
-	u32 remote_qp_number;
-	u16 dlid;
-	u16 rlid;
-	u16 reserved2;
-	u16 pkey_index;
-	u32 cqe_timestamp;
-	u32 wqe_timestamp;
-	u8 wqe_timestamp_valid;
-	u8 reserved3;
-	u8 reserved4;
-	u8 cqe_flags;
-	u32 status;
-};
-
-struct ehca_eqe {
-	u64 entry;
-};
-
-struct ehca_mrte {
-	u64 starting_va;
-	u64 length; /* length of memory region in bytes*/
-	u32 pd;
-	u8 key_instance;
-	u8 pagesize;
-	u8 mr_control;
-	u8 local_remote_access_ctrl;
-	u8 reserved[0x20 - 0x18];
-	u64 at_pointer[4];
-};
-#endif /*_EHCA_QES_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c
deleted file mode 100644
index 896c01f..0000000
--- a/drivers/staging/rdma/ehca/ehca_qp.c
+++ /dev/null
@@ -1,2256 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  QP functions
- *
- *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
- *           Stefan Roscher <stefan.roscher@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-static struct kmem_cache *qp_cache;
-
-/*
- * attributes not supported by query qp
- */
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
-				     IB_QP_EN_SQD_ASYNC_NOTIFY)
-
-/*
- * ehca (internal) qp state values
- */
-enum ehca_qp_state {
-	EHCA_QPS_RESET = 1,
-	EHCA_QPS_INIT = 2,
-	EHCA_QPS_RTR = 3,
-	EHCA_QPS_RTS = 5,
-	EHCA_QPS_SQD = 6,
-	EHCA_QPS_SQE = 8,
-	EHCA_QPS_ERR = 128
-};
-
-/*
- * qp state transitions as defined by IB Arch Rel 1.1 page 431
- */
-enum ib_qp_statetrans {
-	IB_QPST_ANY2RESET,
-	IB_QPST_ANY2ERR,
-	IB_QPST_RESET2INIT,
-	IB_QPST_INIT2RTR,
-	IB_QPST_INIT2INIT,
-	IB_QPST_RTR2RTS,
-	IB_QPST_RTS2SQD,
-	IB_QPST_RTS2RTS,
-	IB_QPST_SQD2RTS,
-	IB_QPST_SQE2RTS,
-	IB_QPST_SQD2SQD,
-	IB_QPST_MAX	/* nr of transitions, this must be last!!! */
-};
-
-/*
- * ib2ehca_qp_state maps IB to ehca qp_state
- * returns ehca qp state corresponding to given ib qp state
- */
-static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
-{
-	switch (ib_qp_state) {
-	case IB_QPS_RESET:
-		return EHCA_QPS_RESET;
-	case IB_QPS_INIT:
-		return EHCA_QPS_INIT;
-	case IB_QPS_RTR:
-		return EHCA_QPS_RTR;
-	case IB_QPS_RTS:
-		return EHCA_QPS_RTS;
-	case IB_QPS_SQD:
-		return EHCA_QPS_SQD;
-	case IB_QPS_SQE:
-		return EHCA_QPS_SQE;
-	case IB_QPS_ERR:
-		return EHCA_QPS_ERR;
-	default:
-		ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
-		return -EINVAL;
-	}
-}
-
-/*
- * ehca2ib_qp_state maps ehca to IB qp_state
- * returns ib qp state corresponding to given ehca qp state
- */
-static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
-						ehca_qp_state)
-{
-	switch (ehca_qp_state) {
-	case EHCA_QPS_RESET:
-		return IB_QPS_RESET;
-	case EHCA_QPS_INIT:
-		return IB_QPS_INIT;
-	case EHCA_QPS_RTR:
-		return IB_QPS_RTR;
-	case EHCA_QPS_RTS:
-		return IB_QPS_RTS;
-	case EHCA_QPS_SQD:
-		return IB_QPS_SQD;
-	case EHCA_QPS_SQE:
-		return IB_QPS_SQE;
-	case EHCA_QPS_ERR:
-		return IB_QPS_ERR;
-	default:
-		ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
-		return -EINVAL;
-	}
-}
-
-/*
- * ehca_qp_type used as index for req_attr and opt_attr of
- * struct ehca_modqp_statetrans
- */
-enum ehca_qp_type {
-	QPT_RC = 0,
-	QPT_UC = 1,
-	QPT_UD = 2,
-	QPT_SQP = 3,
-	QPT_MAX
-};
-
-/*
- * ib2ehcaqptype maps Ib to ehca qp_type
- * returns ehca qp type corresponding to ib qp type
- */
-static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
-{
-	switch (ibqptype) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		return QPT_SQP;
-	case IB_QPT_RC:
-		return QPT_RC;
-	case IB_QPT_UC:
-		return QPT_UC;
-	case IB_QPT_UD:
-		return QPT_UD;
-	default:
-		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-		return -EINVAL;
-	}
-}
-
-static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
-							 int ib_tostate)
-{
-	int index = -EINVAL;
-	switch (ib_tostate) {
-	case IB_QPS_RESET:
-		index = IB_QPST_ANY2RESET;
-		break;
-	case IB_QPS_INIT:
-		switch (ib_fromstate) {
-		case IB_QPS_RESET:
-			index = IB_QPST_RESET2INIT;
-			break;
-		case IB_QPS_INIT:
-			index = IB_QPST_INIT2INIT;
-			break;
-		}
-		break;
-	case IB_QPS_RTR:
-		if (ib_fromstate == IB_QPS_INIT)
-			index = IB_QPST_INIT2RTR;
-		break;
-	case IB_QPS_RTS:
-		switch (ib_fromstate) {
-		case IB_QPS_RTR:
-			index = IB_QPST_RTR2RTS;
-			break;
-		case IB_QPS_RTS:
-			index = IB_QPST_RTS2RTS;
-			break;
-		case IB_QPS_SQD:
-			index = IB_QPST_SQD2RTS;
-			break;
-		case IB_QPS_SQE:
-			index = IB_QPST_SQE2RTS;
-			break;
-		}
-		break;
-	case IB_QPS_SQD:
-		if (ib_fromstate == IB_QPS_RTS)
-			index = IB_QPST_RTS2SQD;
-		break;
-	case IB_QPS_SQE:
-		break;
-	case IB_QPS_ERR:
-		index = IB_QPST_ANY2ERR;
-		break;
-	default:
-		break;
-	}
-	return index;
-}
-
-/*
- * ibqptype2servicetype returns hcp service type corresponding to given
- * ib qp type used by create_qp()
- */
-static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
-{
-	switch (ibqptype) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		return ST_UD;
-	case IB_QPT_RC:
-		return ST_RC;
-	case IB_QPT_UC:
-		return ST_UC;
-	case IB_QPT_UD:
-		return ST_UD;
-	case IB_QPT_RAW_IPV6:
-		return -EINVAL;
-	case IB_QPT_RAW_ETHERTYPE:
-		return -EINVAL;
-	default:
-		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-		return -EINVAL;
-	}
-}
-
-/*
- * init userspace queue info from ipz_queue data
- */
-static inline void queue2resp(struct ipzu_queue_resp *resp,
-			      struct ipz_queue *queue)
-{
-	resp->qe_size = queue->qe_size;
-	resp->act_nr_of_sg = queue->act_nr_of_sg;
-	resp->queue_length = queue->queue_length;
-	resp->pagesize = queue->pagesize;
-	resp->toggle_state = queue->toggle_state;
-	resp->offset = queue->offset;
-}
-
-/*
- * init_qp_queue initializes/constructs r/squeue and registers queue pages.
- */
-static inline int init_qp_queue(struct ehca_shca *shca,
-				struct ehca_pd *pd,
-				struct ehca_qp *my_qp,
-				struct ipz_queue *queue,
-				int q_type,
-				u64 expected_hret,
-				struct ehca_alloc_queue_parms *parms,
-				int wqe_size)
-{
-	int ret, cnt, ipz_rc, nr_q_pages;
-	void *vpage;
-	u64 rpage, h_ret;
-	struct ib_device *ib_dev = &shca->ib_device;
-	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
-
-	if (!parms->queue_size)
-		return 0;
-
-	if (parms->is_small) {
-		nr_q_pages = 1;
-		ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-					128 << parms->page_size,
-					wqe_size, parms->act_nr_sges, 1);
-	} else {
-		nr_q_pages = parms->queue_size;
-		ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-					EHCA_PAGESIZE, wqe_size,
-					parms->act_nr_sges, 0);
-	}
-
-	if (!ipz_rc) {
-		ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
-			 ipz_rc);
-		return -EBUSY;
-	}
-
-	/* register queue pages */
-	for (cnt = 0; cnt < nr_q_pages; cnt++) {
-		vpage = ipz_qpageit_get_inc(queue);
-		if (!vpage) {
-			ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-				 "failed p_vpage= %p", vpage);
-			ret = -EINVAL;
-			goto init_qp_queue1;
-		}
-		rpage = __pa(vpage);
-
-		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-						 my_qp->ipz_qp_handle,
-						 NULL, 0, q_type,
-						 rpage, parms->is_small ? 0 : 1,
-						 my_qp->galpas.kernel);
-		if (cnt == (nr_q_pages - 1)) {	/* last page! */
-			if (h_ret != expected_hret) {
-				ehca_err(ib_dev, "hipz_qp_register_rpage() "
-					 "h_ret=%lli", h_ret);
-				ret = ehca2ib_return_code(h_ret);
-				goto init_qp_queue1;
-			}
-			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-			if (vpage) {
-				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-					 "should not succeed vpage=%p", vpage);
-				ret = -EINVAL;
-				goto init_qp_queue1;
-			}
-		} else {
-			if (h_ret != H_PAGE_REGISTERED) {
-				ehca_err(ib_dev, "hipz_qp_register_rpage() "
-					 "h_ret=%lli", h_ret);
-				ret = ehca2ib_return_code(h_ret);
-				goto init_qp_queue1;
-			}
-		}
-	}
-
-	ipz_qeit_reset(queue);
-
-	return 0;
-
-init_qp_queue1:
-	ipz_queue_dtor(pd, queue);
-	return ret;
-}
-
-static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
-{
-	if (is_llqp)
-		return 128 << act_nr_sge;
-	else
-		return offsetof(struct ehca_wqe,
-				u.nud.sg_list[act_nr_sge]);
-}
-
-static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
-				       int req_nr_sge, int is_llqp)
-{
-	u32 wqe_size, q_size;
-	int act_nr_sge = req_nr_sge;
-
-	if (!is_llqp)
-		/* round up #SGEs so WQE size is a power of 2 */
-		for (act_nr_sge = 4; act_nr_sge <= 252;
-		     act_nr_sge = 4 + 2 * act_nr_sge)
-			if (act_nr_sge >= req_nr_sge)
-				break;
-
-	wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
-	q_size = wqe_size * (queue->max_wr + 1);
-
-	if (q_size <= 512)
-		queue->page_size = 2;
-	else if (q_size <= 1024)
-		queue->page_size = 3;
-	else
-		queue->page_size = 0;
-
-	queue->is_small = (queue->page_size != 0);
-}
-
-/* needs to be called with cq->spinlock held */
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
-{
-	struct list_head *list, *node;
-
-	/* TODO: support low latency QPs */
-	if (qp->ext_type == EQPT_LLQP)
-		return;
-
-	if (on_sq) {
-		list = &qp->send_cq->sqp_err_list;
-		node = &qp->sq_err_node;
-	} else {
-		list = &qp->recv_cq->rqp_err_list;
-		node = &qp->rq_err_node;
-	}
-
-	if (list_empty(node))
-		list_add_tail(node, list);
-
-	return;
-}
-
-static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->spinlock, flags);
-
-	if (!list_empty(node))
-		list_del_init(node);
-
-	spin_unlock_irqrestore(&cq->spinlock, flags);
-}
-
-static void reset_queue_map(struct ehca_queue_map *qmap)
-{
-	int i;
-
-	qmap->tail = qmap->entries - 1;
-	qmap->left_to_poll = 0;
-	qmap->next_wqe_idx = 0;
-	for (i = 0; i < qmap->entries; i++) {
-		qmap->map[i].reported = 1;
-		qmap->map[i].cqe_req = 0;
-	}
-}
-
-/*
- * Create an ib_qp struct that is either a QP or an SRQ, depending on
- * the value of the is_srq parameter. If init_attr and srq_init_attr share
- * fields, the field out of init_attr is used.
- */
-static struct ehca_qp *internal_create_qp(
-	struct ib_pd *pd,
-	struct ib_qp_init_attr *init_attr,
-	struct ib_srq_init_attr *srq_init_attr,
-	struct ib_udata *udata, int is_srq)
-{
-	struct ehca_qp *my_qp, *my_srq = NULL;
-	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-					      ib_device);
-	struct ib_ucontext *context = NULL;
-	u64 h_ret;
-	int is_llqp = 0, has_srq = 0, is_user = 0;
-	int qp_type, max_send_sge, max_recv_sge, ret;
-
-	/* h_call's out parameters */
-	struct ehca_alloc_qp_parms parms;
-	u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
-	unsigned long flags;
-
-	if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
-		ehca_err(pd->device, "Unable to create QP, max number of %i "
-			 "QPs reached.", shca->max_num_qps);
-		ehca_err(pd->device, "To increase the maximum number of QPs "
-			 "use the number_of_qps module parameter.\n");
-		return ERR_PTR(-ENOSPC);
-	}
-
-	if (init_attr->create_flags) {
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	memset(&parms, 0, sizeof(parms));
-	qp_type = init_attr->qp_type;
-
-	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
-		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
-		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
-			 init_attr->sq_sig_type);
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* save LLQP info */
-	if (qp_type & 0x80) {
-		is_llqp = 1;
-		parms.ext_type = EQPT_LLQP;
-		parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
-	}
-	qp_type &= 0x1F;
-	init_attr->qp_type &= 0x1F;
-
-	/* handle SRQ base QPs */
-	if (init_attr->srq) {
-		my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
-
-		if (qp_type == IB_QPT_UC) {
-			ehca_err(pd->device, "UC with SRQ not supported");
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-
-		has_srq = 1;
-		parms.ext_type = EQPT_SRQBASE;
-		parms.srq_qpn = my_srq->real_qp_num;
-	}
-
-	if (is_llqp && has_srq) {
-		ehca_err(pd->device, "LLQPs can't have an SRQ");
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* handle SRQs */
-	if (is_srq) {
-		parms.ext_type = EQPT_SRQ;
-		parms.srq_limit = srq_init_attr->attr.srq_limit;
-		if (init_attr->cap.max_recv_sge > 3) {
-			ehca_err(pd->device, "no more than three SGEs "
-				 "supported for SRQ  pd=%p  max_sge=%x",
-				 pd, init_attr->cap.max_recv_sge);
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/* check QP type */
-	if (qp_type != IB_QPT_UD &&
-	    qp_type != IB_QPT_UC &&
-	    qp_type != IB_QPT_RC &&
-	    qp_type != IB_QPT_SMI &&
-	    qp_type != IB_QPT_GSI) {
-		ehca_err(pd->device, "wrong QP Type=%x", qp_type);
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (is_llqp) {
-		switch (qp_type) {
-		case IB_QPT_RC:
-			if ((init_attr->cap.max_send_wr > 255) ||
-			    (init_attr->cap.max_recv_wr > 255)) {
-				ehca_err(pd->device,
-					 "Invalid Number of max_sq_wr=%x "
-					 "or max_rq_wr=%x for RC LLQP",
-					 init_attr->cap.max_send_wr,
-					 init_attr->cap.max_recv_wr);
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-EINVAL);
-			}
-			break;
-		case IB_QPT_UD:
-			if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
-				ehca_err(pd->device, "UD LLQP not supported "
-					 "by this adapter");
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-ENOSYS);
-			}
-			if (!(init_attr->cap.max_send_sge <= 5
-			    && init_attr->cap.max_send_sge >= 1
-			    && init_attr->cap.max_recv_sge <= 5
-			    && init_attr->cap.max_recv_sge >= 1)) {
-				ehca_err(pd->device,
-					 "Invalid Number of max_send_sge=%x "
-					 "or max_recv_sge=%x for UD LLQP",
-					 init_attr->cap.max_send_sge,
-					 init_attr->cap.max_recv_sge);
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-EINVAL);
-			} else if (init_attr->cap.max_send_wr > 255) {
-				ehca_err(pd->device,
-					 "Invalid Number of "
-					 "max_send_wr=%x for UD QP_TYPE=%x",
-					 init_attr->cap.max_send_wr, qp_type);
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-EINVAL);
-			}
-			break;
-		default:
-			ehca_err(pd->device, "unsupported LL QP Type=%x",
-				 qp_type);
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-	} else {
-		int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
-			       || qp_type == IB_QPT_GSI) ? 250 : 252;
-
-		if (init_attr->cap.max_send_sge > max_sge
-		    || init_attr->cap.max_recv_sge > max_sge) {
-			ehca_err(pd->device, "Invalid number of SGEs requested "
-				 "send_sge=%x recv_sge=%x max_sge=%x",
-				 init_attr->cap.max_send_sge,
-				 init_attr->cap.max_recv_sge, max_sge);
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
-	if (!my_qp) {
-		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (pd->uobject && udata) {
-		is_user = 1;
-		context = pd->uobject->context;
-	}
-
-	atomic_set(&my_qp->nr_events, 0);
-	init_waitqueue_head(&my_qp->wait_completion);
-	spin_lock_init(&my_qp->spinlock_s);
-	spin_lock_init(&my_qp->spinlock_r);
-	my_qp->qp_type = qp_type;
-	my_qp->ext_type = parms.ext_type;
-	my_qp->state = IB_QPS_RESET;
-
-	if (init_attr->recv_cq)
-		my_qp->recv_cq =
-			container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-	if (init_attr->send_cq)
-		my_qp->send_cq =
-			container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-	idr_preload(GFP_KERNEL);
-	write_lock_irqsave(&ehca_qp_idr_lock, flags);
-
-	ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
-	if (ret >= 0)
-		my_qp->token = ret;
-
-	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-	idr_preload_end();
-	if (ret < 0) {
-		if (ret == -ENOSPC) {
-			ret = -EINVAL;
-			ehca_err(pd->device, "Invalid number of qp");
-		} else {
-			ret = -ENOMEM;
-			ehca_err(pd->device, "Can't allocate new idr entry.");
-		}
-		goto create_qp_exit0;
-	}
-
-	if (has_srq)
-		parms.srq_token = my_qp->token;
-
-	parms.servicetype = ibqptype2servicetype(qp_type);
-	if (parms.servicetype < 0) {
-		ret = -EINVAL;
-		ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
-		goto create_qp_exit1;
-	}
-
-	/* Always signal by WQE so we can hide circ. WQEs */
-	parms.sigtype = HCALL_SIGT_BY_WQE;
-
-	/* UD_AV CIRCUMVENTION */
-	max_send_sge = init_attr->cap.max_send_sge;
-	max_recv_sge = init_attr->cap.max_recv_sge;
-	if (parms.servicetype == ST_UD && !is_llqp) {
-		max_send_sge += 2;
-		max_recv_sge += 2;
-	}
-
-	parms.token = my_qp->token;
-	parms.eq_handle = shca->eq.ipz_eq_handle;
-	parms.pd = my_pd->fw_pd;
-	if (my_qp->send_cq)
-		parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
-	if (my_qp->recv_cq)
-		parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
-
-	parms.squeue.max_wr = init_attr->cap.max_send_wr;
-	parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
-	parms.squeue.max_sge = max_send_sge;
-	parms.rqueue.max_sge = max_recv_sge;
-
-	/* RC QPs need one more SWQE for unsolicited ack circumvention */
-	if (qp_type == IB_QPT_RC)
-		parms.squeue.max_wr++;
-
-	if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
-		if (HAS_SQ(my_qp))
-			ehca_determine_small_queue(
-				&parms.squeue, max_send_sge, is_llqp);
-		if (HAS_RQ(my_qp))
-			ehca_determine_small_queue(
-				&parms.rqueue, max_recv_sge, is_llqp);
-		parms.qp_storage =
-			(parms.squeue.is_small || parms.rqueue.is_small);
-	}
-
-	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
-			 h_ret);
-		ret = ehca2ib_return_code(h_ret);
-		goto create_qp_exit1;
-	}
-
-	ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
-	my_qp->ipz_qp_handle = parms.qp_handle;
-	my_qp->galpas = parms.galpas;
-
-	swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
-	rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
-
-	switch (qp_type) {
-	case IB_QPT_RC:
-		if (is_llqp) {
-			parms.squeue.act_nr_sges = 1;
-			parms.rqueue.act_nr_sges = 1;
-		}
-		/* hide the extra WQE */
-		parms.squeue.act_nr_wqes--;
-		break;
-	case IB_QPT_UD:
-	case IB_QPT_GSI:
-	case IB_QPT_SMI:
-		/* UD circumvention */
-		if (is_llqp) {
-			parms.squeue.act_nr_sges = 1;
-			parms.rqueue.act_nr_sges = 1;
-		} else {
-			parms.squeue.act_nr_sges -= 2;
-			parms.rqueue.act_nr_sges -= 2;
-		}
-
-		if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
-			parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
-			parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
-			parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
-			parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
-			ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
-		}
-
-		break;
-
-	default:
-		break;
-	}
-
-	/* initialize r/squeue and register queue pages */
-	if (HAS_SQ(my_qp)) {
-		ret = init_qp_queue(
-			shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
-			HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
-			&parms.squeue, swqe_size);
-		if (ret) {
-			ehca_err(pd->device, "Couldn't initialize squeue "
-				 "and pages ret=%i", ret);
-			goto create_qp_exit2;
-		}
-
-		if (!is_user) {
-			my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-				my_qp->ipz_squeue.qe_size;
-			my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
-						    sizeof(struct ehca_qmap_entry));
-			if (!my_qp->sq_map.map) {
-				ehca_err(pd->device, "Couldn't allocate squeue "
-					 "map ret=%i", ret);
-				goto create_qp_exit3;
-			}
-			INIT_LIST_HEAD(&my_qp->sq_err_node);
-			/* to avoid the generation of bogus flush CQEs */
-			reset_queue_map(&my_qp->sq_map);
-		}
-	}
-
-	if (HAS_RQ(my_qp)) {
-		ret = init_qp_queue(
-			shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
-			H_SUCCESS, &parms.rqueue, rwqe_size);
-		if (ret) {
-			ehca_err(pd->device, "Couldn't initialize rqueue "
-				 "and pages ret=%i", ret);
-			goto create_qp_exit4;
-		}
-		if (!is_user) {
-			my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-				my_qp->ipz_rqueue.qe_size;
-			my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
-						    sizeof(struct ehca_qmap_entry));
-			if (!my_qp->rq_map.map) {
-				ehca_err(pd->device, "Couldn't allocate squeue "
-					 "map ret=%i", ret);
-				goto create_qp_exit5;
-			}
-			INIT_LIST_HEAD(&my_qp->rq_err_node);
-			/* to avoid the generation of bogus flush CQEs */
-			reset_queue_map(&my_qp->rq_map);
-		}
-	} else if (init_attr->srq && !is_user) {
-		/* this is a base QP, use the queue map of the SRQ */
-		my_qp->rq_map = my_srq->rq_map;
-		INIT_LIST_HEAD(&my_qp->rq_err_node);
-
-		my_qp->ipz_rqueue = my_srq->ipz_rqueue;
-	}
-
-	if (is_srq) {
-		my_qp->ib_srq.pd = &my_pd->ib_pd;
-		my_qp->ib_srq.device = my_pd->ib_pd.device;
-
-		my_qp->ib_srq.srq_context = init_attr->qp_context;
-		my_qp->ib_srq.event_handler = init_attr->event_handler;
-	} else {
-		my_qp->ib_qp.qp_num = ib_qp_num;
-		my_qp->ib_qp.pd = &my_pd->ib_pd;
-		my_qp->ib_qp.device = my_pd->ib_pd.device;
-
-		my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-		my_qp->ib_qp.send_cq = init_attr->send_cq;
-
-		my_qp->ib_qp.qp_type = qp_type;
-		my_qp->ib_qp.srq = init_attr->srq;
-
-		my_qp->ib_qp.qp_context = init_attr->qp_context;
-		my_qp->ib_qp.event_handler = init_attr->event_handler;
-	}
-
-	init_attr->cap.max_inline_data = 0; /* not supported yet */
-	init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
-	init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
-	init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
-	init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
-	my_qp->init_attr = *init_attr;
-
-	if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-		shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-			&my_qp->ib_qp;
-		if (ehca_nr_ports < 0) {
-			/* alloc array to cache subsequent modify qp parms
-			 * for autodetect mode
-			 */
-			my_qp->mod_qp_parm =
-				kzalloc(EHCA_MOD_QP_PARM_MAX *
-					sizeof(*my_qp->mod_qp_parm),
-					GFP_KERNEL);
-			if (!my_qp->mod_qp_parm) {
-				ehca_err(pd->device,
-					 "Could not alloc mod_qp_parm");
-				goto create_qp_exit5;
-			}
-		}
-	}
-
-	/* NOTE: define_apq0() not supported yet */
-	if (qp_type == IB_QPT_GSI) {
-		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
-		if (h_ret != H_SUCCESS) {
-			kfree(my_qp->mod_qp_parm);
-			my_qp->mod_qp_parm = NULL;
-			/* the QP pointer is no longer valid */
-			shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-				NULL;
-			ret = ehca2ib_return_code(h_ret);
-			goto create_qp_exit6;
-		}
-	}
-
-	if (my_qp->send_cq) {
-		ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
-		if (ret) {
-			ehca_err(pd->device,
-				 "Couldn't assign qp to send_cq ret=%i", ret);
-			goto create_qp_exit7;
-		}
-	}
-
-	/* copy queues, galpa data to user space */
-	if (context && udata) {
-		struct ehca_create_qp_resp resp;
-		memset(&resp, 0, sizeof(resp));
-
-		resp.qp_num = my_qp->real_qp_num;
-		resp.token = my_qp->token;
-		resp.qp_type = my_qp->qp_type;
-		resp.ext_type = my_qp->ext_type;
-		resp.qkey = my_qp->qkey;
-		resp.real_qp_num = my_qp->real_qp_num;
-
-		if (HAS_SQ(my_qp))
-			queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
-		if (HAS_RQ(my_qp))
-			queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
-		resp.fw_handle_ofs = (u32)
-			(my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
-
-		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
-			ehca_err(pd->device, "Copy to udata failed");
-			ret = -EINVAL;
-			goto create_qp_exit8;
-		}
-	}
-
-	return my_qp;
-
-create_qp_exit8:
-	ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-
-create_qp_exit7:
-	kfree(my_qp->mod_qp_parm);
-
-create_qp_exit6:
-	if (HAS_RQ(my_qp) && !is_user)
-		vfree(my_qp->rq_map.map);
-
-create_qp_exit5:
-	if (HAS_RQ(my_qp))
-		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-
-create_qp_exit4:
-	if (HAS_SQ(my_qp) && !is_user)
-		vfree(my_qp->sq_map.map);
-
-create_qp_exit3:
-	if (HAS_SQ(my_qp))
-		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-
-create_qp_exit2:
-	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-
-create_qp_exit1:
-	write_lock_irqsave(&ehca_qp_idr_lock, flags);
-	idr_remove(&ehca_qp_idr, my_qp->token);
-	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-create_qp_exit0:
-	kmem_cache_free(qp_cache, my_qp);
-	atomic_dec(&shca->num_qps);
-	return ERR_PTR(ret);
-}
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *qp_init_attr,
-			     struct ib_udata *udata)
-{
-	struct ehca_qp *ret;
-
-	ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
-	return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-			       struct ib_uobject *uobject);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata)
-{
-	struct ib_qp_init_attr qp_init_attr;
-	struct ehca_qp *my_qp;
-	struct ib_srq *ret;
-	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-					      ib_device);
-	struct hcp_modify_qp_control_block *mqpcb;
-	u64 hret, update_mask;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
-		return ERR_PTR(-ENOSYS);
-
-	/* For common attributes, internal_create_qp() takes its info
-	 * out of qp_init_attr, so copy all common attrs there.
-	 */
-	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
-	qp_init_attr.event_handler = srq_init_attr->event_handler;
-	qp_init_attr.qp_context = srq_init_attr->srq_context;
-	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-	qp_init_attr.qp_type = IB_QPT_RC;
-	qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
-	qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
-
-	my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
-	if (IS_ERR(my_qp))
-		return (struct ib_srq *)my_qp;
-
-	/* copy back return values */
-	srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
-	srq_init_attr->attr.max_sge = 3;
-
-	/* drive SRQ into RTR state */
-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!mqpcb) {
-		ehca_err(pd->device, "Could not get zeroed page for mqpcb "
-			 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-		ret = ERR_PTR(-ENOMEM);
-		goto create_srq1;
-	}
-
-	mqpcb->qp_state = EHCA_QPS_INIT;
-	mqpcb->prim_phys_port = 1;
-	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				update_mask,
-				mqpcb, my_qp->galpas.kernel);
-	if (hret != H_SUCCESS) {
-		ehca_err(pd->device, "Could not modify SRQ to INIT "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, hret);
-		goto create_srq2;
-	}
-
-	mqpcb->qp_enable = 1;
-	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				update_mask,
-				mqpcb, my_qp->galpas.kernel);
-	if (hret != H_SUCCESS) {
-		ehca_err(pd->device, "Could not enable SRQ "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, hret);
-		goto create_srq2;
-	}
-
-	mqpcb->qp_state  = EHCA_QPS_RTR;
-	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				update_mask,
-				mqpcb, my_qp->galpas.kernel);
-	if (hret != H_SUCCESS) {
-		ehca_err(pd->device, "Could not modify SRQ to RTR "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, hret);
-		goto create_srq2;
-	}
-
-	ehca_free_fw_ctrlblock(mqpcb);
-
-	return &my_qp->ib_srq;
-
-create_srq2:
-	ret = ERR_PTR(ehca2ib_return_code(hret));
-	ehca_free_fw_ctrlblock(mqpcb);
-
-create_srq1:
-	internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
-
-	return ret;
-}
-
-/*
- * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
- * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
- * returns total number of bad wqes in bad_wqe_cnt
- */
-static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
-			   int *bad_wqe_cnt)
-{
-	u64 h_ret;
-	struct ipz_queue *squeue;
-	void *bad_send_wqe_p, *bad_send_wqe_v;
-	u64 q_ofs;
-	struct ehca_wqe *wqe;
-	int qp_num = my_qp->ib_qp.qp_num;
-
-	/* get send wqe pointer */
-	h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-					   my_qp->ipz_qp_handle, &my_qp->pf,
-					   &bad_send_wqe_p, NULL, 2);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
-			 " ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, qp_num, h_ret);
-		return ehca2ib_return_code(h_ret);
-	}
-	bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
-	ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
-		 qp_num, bad_send_wqe_p);
-	/* convert wqe pointer to vadr */
-	bad_send_wqe_v = __va((u64)bad_send_wqe_p);
-	if (ehca_debug_level >= 2)
-		ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
-	squeue = &my_qp->ipz_squeue;
-	if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
-		ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
-			 " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
-		return -EFAULT;
-	}
-
-	/* loop sets wqe's purge bit */
-	wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-	*bad_wqe_cnt = 0;
-	while (wqe->optype != 0xff && wqe->wqef != 0xff) {
-		if (ehca_debug_level >= 2)
-			ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
-		wqe->nr_of_data_seg = 0; /* suppress data access */
-		wqe->wqef = WQEF_PURGE; /* WQE to be purged */
-		q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
-		wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-		*bad_wqe_cnt = (*bad_wqe_cnt)+1;
-	}
-	/*
-	 * bad wqe will be reprocessed and ignored when pol_cq() is called,
-	 *  i.e. nr of wqes with flush error status is one less
-	 */
-	ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
-		 qp_num, (*bad_wqe_cnt)-1);
-	wqe->wqef = 0;
-
-	return 0;
-}
-
-static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
-			  struct ehca_queue_map *qmap)
-{
-	void *wqe_v;
-	u64 q_ofs;
-	u32 wqe_idx;
-	unsigned int tail_idx;
-
-	/* convert real to abs address */
-	wqe_p = wqe_p & (~(1UL << 63));
-
-	wqe_v = __va(wqe_p);
-
-	if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
-		ehca_gen_err("Invalid offset for calculating left cqes "
-				"wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
-		return -EFAULT;
-	}
-
-	tail_idx = next_index(qmap->tail, qmap->entries);
-	wqe_idx = q_ofs / ipz_queue->qe_size;
-
-	/* check all processed wqes, whether a cqe is requested or not */
-	while (tail_idx != wqe_idx) {
-		if (qmap->map[tail_idx].cqe_req)
-			qmap->left_to_poll++;
-		tail_idx = next_index(tail_idx, qmap->entries);
-	}
-	/* save index in queue, where we have to start flushing */
-	qmap->next_wqe_idx = wqe_idx;
-	return 0;
-}
-
-static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
-{
-	u64 h_ret;
-	void *send_wqe_p, *recv_wqe_p;
-	int ret;
-	unsigned long flags;
-	int qp_num = my_qp->ib_qp.qp_num;
-
-	/* this hcall is not supported on base QPs */
-	if (my_qp->ext_type != EQPT_SRQBASE) {
-		/* get send and receive wqe pointer */
-		h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle, &my_qp->pf,
-				&send_wqe_p, &recv_wqe_p, 4);
-		if (h_ret != H_SUCCESS) {
-			ehca_err(&shca->ib_device, "disable_and_get_wqe() "
-				 "failed ehca_qp=%p qp_num=%x h_ret=%lli",
-				 my_qp, qp_num, h_ret);
-			return ehca2ib_return_code(h_ret);
-		}
-
-		/*
-		 * acquire lock to ensure that nobody is polling the cq which
-		 * could mean that the qmap->tail pointer is in an
-		 * inconsistent state.
-		 */
-		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-		ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
-				&my_qp->sq_map);
-		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-		if (ret)
-			return ret;
-
-
-		spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-		ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
-				&my_qp->rq_map);
-		spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-		if (ret)
-			return ret;
-	} else {
-		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-		my_qp->sq_map.left_to_poll = 0;
-		my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-							my_qp->sq_map.entries);
-		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-		spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-		my_qp->rq_map.left_to_poll = 0;
-		my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-							my_qp->rq_map.entries);
-		spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-	}
-
-	/* this assures flush cqes being generated only for pending wqes */
-	if ((my_qp->sq_map.left_to_poll == 0) &&
-				(my_qp->rq_map.left_to_poll == 0)) {
-		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-		ehca_add_to_err_list(my_qp, 1);
-		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-		if (HAS_RQ(my_qp)) {
-			spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-			ehca_add_to_err_list(my_qp, 0);
-			spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
-					flags);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * internal_modify_qp with circumvention to handle aqp0 properly
- * smi_reset2init indicates if this is an internal reset-to-init-call for
- * smi. This flag must always be zero if called from ehca_modify_qp()!
- * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
- */
-static int internal_modify_qp(struct ib_qp *ibqp,
-			      struct ib_qp_attr *attr,
-			      int attr_mask, int smi_reset2init)
-{
-	enum ib_qp_state qp_cur_state, qp_new_state;
-	int cnt, qp_attr_idx, ret = 0;
-	enum ib_qp_statetrans statetrans;
-	struct hcp_modify_qp_control_block *mqpcb;
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca =
-		container_of(ibqp->pd->device, struct ehca_shca, ib_device);
-	u64 update_mask;
-	u64 h_ret;
-	int bad_wqe_cnt = 0;
-	int is_user = 0;
-	int squeue_locked = 0;
-	unsigned long flags = 0;
-
-	/* do query_qp to obtain current attr values */
-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-	if (!mqpcb) {
-		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
-			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				mqpcb, my_qp->galpas.kernel);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(ibqp->device, "hipz_h_query_qp() failed "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, ibqp->qp_num, h_ret);
-		ret = ehca2ib_return_code(h_ret);
-		goto modify_qp_exit1;
-	}
-	if (ibqp->uobject)
-		is_user = 1;
-
-	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
-
-	if (qp_cur_state == -EINVAL) {	/* invalid qp state */
-		ret = -EINVAL;
-		ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
-			 "ehca_qp=%p qp_num=%x",
-			 mqpcb->qp_state, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-	/*
-	 * circumvention to set aqp0 initial state to init
-	 * as expected by IB spec
-	 */
-	if (smi_reset2init == 0 &&
-	    ibqp->qp_type == IB_QPT_SMI &&
-	    qp_cur_state == IB_QPS_RESET &&
-	    (attr_mask & IB_QP_STATE) &&
-	    attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
-		struct ib_qp_attr smiqp_attr = {
-			.qp_state = IB_QPS_INIT,
-			.port_num = my_qp->init_attr.port_num,
-			.pkey_index = 0,
-			.qkey = 0
-		};
-		int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
-			IB_QP_PKEY_INDEX | IB_QP_QKEY;
-		int smirc = internal_modify_qp(
-			ibqp, &smiqp_attr, smiqp_attr_mask, 1);
-		if (smirc) {
-			ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
-				 "ehca_modify_qp() rc=%i", smirc);
-			ret = H_PARAMETER;
-			goto modify_qp_exit1;
-		}
-		qp_cur_state = IB_QPS_INIT;
-		ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
-	}
-	/* is transmitted current state  equal to "real" current state */
-	if ((attr_mask & IB_QP_CUR_STATE) &&
-	    qp_cur_state != attr->cur_qp_state) {
-		ret = -EINVAL;
-		ehca_err(ibqp->device,
-			 "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
-			 " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
-			 attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
-		 "new qp_state=%x attribute_mask=%x",
-		 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
-
-	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
-	if (!smi_reset2init &&
-	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
-		ret = -EINVAL;
-		ehca_err(ibqp->device,
-			 "Invalid qp transition new_state=%x cur_state=%x "
-			 "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
-			 qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
-		goto modify_qp_exit1;
-	}
-
-	mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
-	if (mqpcb->qp_state)
-		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-	else {
-		ret = -EINVAL;
-		ehca_err(ibqp->device, "Invalid new qp state=%x "
-			 "ehca_qp=%p qp_num=%x",
-			 qp_new_state, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	/* retrieve state transition struct to get req and opt attrs */
-	statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
-	if (statetrans < 0) {
-		ret = -EINVAL;
-		ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
-			 "new_qp_state=%x State_xsition=%x ehca_qp=%p "
-			 "qp_num=%x", qp_cur_state, qp_new_state,
-			 statetrans, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
-
-	if (qp_attr_idx < 0) {
-		ret = qp_attr_idx;
-		ehca_err(ibqp->device,
-			 "Invalid QP type=%x ehca_qp=%p qp_num=%x",
-			 ibqp->qp_type, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	ehca_dbg(ibqp->device,
-		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
-		 my_qp, ibqp->qp_num, statetrans);
-
-	/* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
-	 * in non-LL UD QPs.
-	 */
-	if ((my_qp->qp_type == IB_QPT_UD) &&
-	    (my_qp->ext_type != EQPT_LLQP) &&
-	    (statetrans == IB_QPST_INIT2RTR) &&
-	    (shca->hw_level >= 0x22)) {
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-		mqpcb->send_grh_flag = 1;
-	}
-
-	/* sqe -> rts: set purge bit of bad wqe before actual trans */
-	if ((my_qp->qp_type == IB_QPT_UD ||
-	     my_qp->qp_type == IB_QPT_GSI ||
-	     my_qp->qp_type == IB_QPT_SMI) &&
-	    statetrans == IB_QPST_SQE2RTS) {
-		/* mark next free wqe if kernel */
-		if (!ibqp->uobject) {
-			struct ehca_wqe *wqe;
-			/* lock send queue */
-			spin_lock_irqsave(&my_qp->spinlock_s, flags);
-			squeue_locked = 1;
-			/* mark next free wqe */
-			wqe = (struct ehca_wqe *)
-				ipz_qeit_get(&my_qp->ipz_squeue);
-			wqe->optype = wqe->wqef = 0xff;
-			ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
-				 ibqp->qp_num, wqe);
-		}
-		ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
-		if (ret) {
-			ehca_err(ibqp->device, "prepare_sqe_rts() failed "
-				 "ehca_qp=%p qp_num=%x ret=%i",
-				 my_qp, ibqp->qp_num, ret);
-			goto modify_qp_exit2;
-		}
-	}
-
-	/*
-	 * enable RDMA_Atomic_Control if reset->init und reliable con
-	 * this is necessary since gen2 does not provide that flag,
-	 * but pHyp requires it
-	 */
-	if (statetrans == IB_QPST_RESET2INIT &&
-	    (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
-		mqpcb->rdma_atomic_ctrl = 3;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
-	}
-	/* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
-	if (statetrans == IB_QPST_INIT2RTR &&
-	    (ibqp->qp_type == IB_QPT_UC) &&
-	    !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
-		mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX) {
-		if (attr->pkey_index >= 16) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid pkey_index=%x. "
-				 "ehca_qp=%p qp_num=%x max_pkey_index=f",
-				 attr->pkey_index, my_qp, ibqp->qp_num);
-			goto modify_qp_exit2;
-		}
-		mqpcb->prim_p_key_idx = attr->pkey_index;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
-	}
-	if (attr_mask & IB_QP_PORT) {
-		struct ehca_sport *sport;
-		struct ehca_qp *aqp1;
-		if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid port=%x. "
-				 "ehca_qp=%p qp_num=%x num_ports=%x",
-				 attr->port_num, my_qp, ibqp->qp_num,
-				 shca->num_ports);
-			goto modify_qp_exit2;
-		}
-		sport = &shca->sport[attr->port_num - 1];
-		if (!sport->ibqp_sqp[IB_QPT_GSI]) {
-			/* should not occur */
-			ret = -EFAULT;
-			ehca_err(ibqp->device, "AQP1 was not created for "
-				 "port=%x", attr->port_num);
-			goto modify_qp_exit2;
-		}
-		aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
-				    struct ehca_qp, ib_qp);
-		if (ibqp->qp_type != IB_QPT_GSI &&
-		    ibqp->qp_type != IB_QPT_SMI &&
-		    aqp1->mod_qp_parm) {
-			/*
-			 * firmware will reject this modify_qp() because
-			 * port is not activated/initialized fully
-			 */
-			ret = -EFAULT;
-			ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
-				  "either port is being activated (try again) "
-				  "or cabling issue", attr->port_num);
-			goto modify_qp_exit2;
-		}
-		mqpcb->prim_phys_port = attr->port_num;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
-	}
-	if (attr_mask & IB_QP_QKEY) {
-		mqpcb->qkey = attr->qkey;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
-	}
-	if (attr_mask & IB_QP_AV) {
-		mqpcb->dlid = attr->ah_attr.dlid;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
-		mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
-		mqpcb->service_level = attr->ah_attr.sl;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
-
-		if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
-				  attr->ah_attr.static_rate,
-				  &mqpcb->max_static_rate)) {
-			ret = -EINVAL;
-			goto modify_qp_exit2;
-		}
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
-
-		/*
-		 * Always supply the GRH flag, even if it's zero, to give the
-		 * hypervisor a clear "yes" or "no" instead of a "perhaps"
-		 */
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-
-		/*
-		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
-		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-		 */
-		if (attr->ah_attr.ah_flags == IB_AH_GRH) {
-			mqpcb->send_grh_flag = 1;
-
-			mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
-			update_mask |=
-				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
-
-			for (cnt = 0; cnt < 16; cnt++)
-				mqpcb->dest_gid.byte[cnt] =
-					attr->ah_attr.grh.dgid.raw[cnt];
-
-			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
-			mqpcb->flow_label = attr->ah_attr.grh.flow_label;
-			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
-			mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
-			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
-			mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
-			update_mask |=
-				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		/* store ld(MTU) */
-		my_qp->mtu_shift = attr->path_mtu + 7;
-		mqpcb->path_mtu = attr->path_mtu;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
-	}
-	if (attr_mask & IB_QP_TIMEOUT) {
-		mqpcb->timeout = attr->timeout;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
-	}
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		mqpcb->retry_count = attr->retry_cnt;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
-	}
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		mqpcb->rnr_retry_count = attr->rnr_retry;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
-	}
-	if (attr_mask & IB_QP_RQ_PSN) {
-		mqpcb->receive_psn = attr->rq_psn;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
-	}
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-		mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
-			attr->max_dest_rd_atomic : 2;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-	}
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-		mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
-			attr->max_rd_atomic : 2;
-		update_mask |=
-			EHCA_BMASK_SET
-			(MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
-	}
-	if (attr_mask & IB_QP_ALT_PATH) {
-		if (attr->alt_port_num < 1
-		    || attr->alt_port_num > shca->num_ports) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid alt_port=%x. "
-				 "ehca_qp=%p qp_num=%x num_ports=%x",
-				 attr->alt_port_num, my_qp, ibqp->qp_num,
-				 shca->num_ports);
-			goto modify_qp_exit2;
-		}
-		mqpcb->alt_phys_port = attr->alt_port_num;
-
-		if (attr->alt_pkey_index >= 16) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
-				 "ehca_qp=%p qp_num=%x max_pkey_index=f",
-				 attr->pkey_index, my_qp, ibqp->qp_num);
-			goto modify_qp_exit2;
-		}
-		mqpcb->alt_p_key_idx = attr->alt_pkey_index;
-
-		mqpcb->timeout_al = attr->alt_timeout;
-		mqpcb->dlid_al = attr->alt_ah_attr.dlid;
-		mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
-		mqpcb->service_level_al = attr->alt_ah_attr.sl;
-
-		if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
-				  attr->alt_ah_attr.static_rate,
-				  &mqpcb->max_static_rate_al)) {
-			ret = -EINVAL;
-			goto modify_qp_exit2;
-		}
-
-		/* OpenIB doesn't support alternate retry counts - copy them */
-		mqpcb->retry_count_al = mqpcb->retry_count;
-		mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
-
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
-
-		/*
-		 * Always supply the GRH flag, even if it's zero, to give the
-		 * hypervisor a clear "yes" or "no" instead of a "perhaps"
-		 */
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
-
-		/*
-		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
-		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-		 */
-		if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
-			mqpcb->send_grh_flag_al = 1;
-
-			for (cnt = 0; cnt < 16; cnt++)
-				mqpcb->dest_gid_al.byte[cnt] =
-					attr->alt_ah_attr.grh.dgid.raw[cnt];
-			mqpcb->source_gid_idx_al =
-				attr->alt_ah_attr.grh.sgid_index;
-			mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
-			mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
-			mqpcb->traffic_class_al =
-				attr->alt_ah_attr.grh.traffic_class;
-
-			update_mask |=
-				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
-				| EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
-				| EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
-				| EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
-				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
-		}
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
-		mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
-	}
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		mqpcb->send_psn = attr->sq_psn;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
-	}
-
-	if (attr_mask & IB_QP_DEST_QPN) {
-		mqpcb->dest_qp_nr = attr->dest_qp_num;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state != IB_MIG_REARM
-		    && attr->path_mig_state != IB_MIG_MIGRATED) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid mig_state=%x",
-				 attr->path_mig_state);
-			goto modify_qp_exit2;
-		}
-		mqpcb->path_migration_state = attr->path_mig_state + 1;
-		if (attr->path_mig_state == IB_MIG_REARM)
-			my_qp->mig_armed = 1;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
-	}
-
-	if (attr_mask & IB_QP_CAP) {
-		mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
-		mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
-		/* no support for max_send/recv_sge yet */
-	}
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
-
-	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				 my_qp->ipz_qp_handle,
-				 &my_qp->pf,
-				 update_mask,
-				 mqpcb, my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
-			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
-		goto modify_qp_exit2;
-	}
-
-	if ((my_qp->qp_type == IB_QPT_UD ||
-	     my_qp->qp_type == IB_QPT_GSI ||
-	     my_qp->qp_type == IB_QPT_SMI) &&
-	    statetrans == IB_QPST_SQE2RTS) {
-		/* doorbell to reprocessing wqes */
-		iosync(); /* serialize GAL register access */
-		hipz_update_sqa(my_qp, bad_wqe_cnt-1);
-		ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
-	}
-
-	if (statetrans == IB_QPST_RESET2INIT ||
-	    statetrans == IB_QPST_INIT2INIT) {
-		mqpcb->qp_enable = 1;
-		mqpcb->qp_state = EHCA_QPS_INIT;
-		update_mask = 0;
-		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-
-		h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-					 my_qp->ipz_qp_handle,
-					 &my_qp->pf,
-					 update_mask,
-					 mqpcb,
-					 my_qp->galpas.kernel);
-
-		if (h_ret != H_SUCCESS) {
-			ret = ehca2ib_return_code(h_ret);
-			ehca_err(ibqp->device, "ENABLE in context of "
-				 "RESET_2_INIT failed! Maybe you didn't get "
-				 "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
-				 h_ret, my_qp, ibqp->qp_num);
-			goto modify_qp_exit2;
-		}
-	}
-	if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
-	    && !is_user) {
-		ret = check_for_left_cqes(my_qp, shca);
-		if (ret)
-			goto modify_qp_exit2;
-	}
-
-	if (statetrans == IB_QPST_ANY2RESET) {
-		ipz_qeit_reset(&my_qp->ipz_rqueue);
-		ipz_qeit_reset(&my_qp->ipz_squeue);
-
-		if (qp_cur_state == IB_QPS_ERR && !is_user) {
-			del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-			if (HAS_RQ(my_qp))
-				del_from_err_list(my_qp->recv_cq,
-						  &my_qp->rq_err_node);
-		}
-		if (!is_user)
-			reset_queue_map(&my_qp->sq_map);
-
-		if (HAS_RQ(my_qp) && !is_user)
-			reset_queue_map(&my_qp->rq_map);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		my_qp->qkey = attr->qkey;
-
-modify_qp_exit2:
-	if (squeue_locked) { /* this means: sqe -> rts */
-		spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-		my_qp->sqerr_purgeflag = 1;
-	}
-
-modify_qp_exit1:
-	ehca_free_fw_ctrlblock(mqpcb);
-
-	return ret;
-}
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-		   struct ib_udata *udata)
-{
-	int ret = 0;
-
-	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-					      ib_device);
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-
-	/* The if-block below caches qp_attr to be modified for GSI and SMI
-	 * qps during the initialization by ib_mad. When the respective port
-	 * is activated, ie we got an event PORT_ACTIVE, we'll replay the
-	 * cached modify calls sequence, see ehca_recover_sqs() below.
-	 * Why that is required:
-	 * 1) If one port is connected, older code requires that port one
-	 *    to be connected and module option nr_ports=1 to be given by
-	 *    user, which is very inconvenient for end user.
-	 * 2) Firmware accepts modify_qp() only if respective port has become
-	 *    active. Older code had a wait loop of 30sec create_qp()/
-	 *    define_aqp1(), which is not appropriate in practice. This
-	 *    code now removes that wait loop, see define_aqp1(), and always
-	 *    reports all ports to ib_mad resp. users. Only activated ports
-	 *    will then usable for the users.
-	 */
-	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
-		int port = my_qp->init_attr.port_num;
-		struct ehca_sport *sport = &shca->sport[port - 1];
-		unsigned long flags;
-		spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-		/* cache qp_attr only during init */
-		if (my_qp->mod_qp_parm) {
-			struct ehca_mod_qp_parm *p;
-			if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
-				ehca_err(&shca->ib_device,
-					 "mod_qp_parm overflow state=%x port=%x"
-					 " type=%x", attr->qp_state,
-					 my_qp->init_attr.port_num,
-					 ibqp->qp_type);
-				spin_unlock_irqrestore(&sport->mod_sqp_lock,
-						       flags);
-				return -EINVAL;
-			}
-			p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
-			p->mask = attr_mask;
-			p->attr = *attr;
-			my_qp->mod_qp_parm_idx++;
-			ehca_dbg(&shca->ib_device,
-				 "Saved qp_attr for state=%x port=%x type=%x",
-				 attr->qp_state, my_qp->init_attr.port_num,
-				 ibqp->qp_type);
-			spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-			goto out;
-		}
-		spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-	}
-
-	ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
-
-out:
-	if ((ret == 0) && (attr_mask & IB_QP_STATE))
-		my_qp->state = attr->qp_state;
-
-	return ret;
-}
-
-void ehca_recover_sqp(struct ib_qp *sqp)
-{
-	struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
-	int port = my_sqp->init_attr.port_num;
-	struct ib_qp_attr attr;
-	struct ehca_mod_qp_parm *qp_parm;
-	int i, qp_parm_idx, ret;
-	unsigned long flags, wr_cnt;
-
-	if (!my_sqp->mod_qp_parm)
-		return;
-	ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
-
-	qp_parm = my_sqp->mod_qp_parm;
-	qp_parm_idx = my_sqp->mod_qp_parm_idx;
-	for (i = 0; i < qp_parm_idx; i++) {
-		attr = qp_parm[i].attr;
-		ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
-		if (ret) {
-			ehca_err(sqp->device, "Could not modify SQP port=%x "
-				 "qp_num=%x ret=%x", port, sqp->qp_num, ret);
-			goto free_qp_parm;
-		}
-		ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
-			 port, sqp->qp_num, attr.qp_state);
-	}
-
-	/* re-trigger posted recv wrs */
-	wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
-		my_sqp->ipz_rqueue.qe_size;
-	if (wr_cnt) {
-		spin_lock_irqsave(&my_sqp->spinlock_r, flags);
-		hipz_update_rqa(my_sqp, wr_cnt);
-		spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
-		ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
-			 port, sqp->qp_num, wr_cnt);
-	}
-
-free_qp_parm:
-	kfree(qp_parm);
-	/* this prevents subsequent calls to modify_qp() to cache qp_attr */
-	my_sqp->mod_qp_parm = NULL;
-}
-
-int ehca_query_qp(struct ib_qp *qp,
-		  struct ib_qp_attr *qp_attr,
-		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
-{
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
-					      ib_device);
-	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	struct hcp_modify_qp_control_block *qpcb;
-	int cnt, ret = 0;
-	u64 h_ret;
-
-	if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
-		ehca_err(qp->device, "Invalid attribute mask "
-			 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
-			 my_qp, qp->qp_num, qp_attr_mask);
-		return -EINVAL;
-	}
-
-	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!qpcb) {
-		ehca_err(qp->device, "Out of memory for qpcb "
-			 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_qp(adapter_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				qpcb, my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(qp->device, "hipz_h_query_qp() failed "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, qp->qp_num, h_ret);
-		goto query_qp_exit1;
-	}
-
-	qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
-	qp_attr->qp_state = qp_attr->cur_qp_state;
-
-	if (qp_attr->cur_qp_state == -EINVAL) {
-		ret = -EINVAL;
-		ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
-			 "ehca_qp=%p qp_num=%x",
-			 qpcb->qp_state, my_qp, qp->qp_num);
-		goto query_qp_exit1;
-	}
-
-	if (qp_attr->qp_state == IB_QPS_SQD)
-		qp_attr->sq_draining = 1;
-
-	qp_attr->qkey = qpcb->qkey;
-	qp_attr->path_mtu = qpcb->path_mtu;
-	qp_attr->path_mig_state = qpcb->path_migration_state - 1;
-	qp_attr->rq_psn = qpcb->receive_psn;
-	qp_attr->sq_psn = qpcb->send_psn;
-	qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
-	qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
-	qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
-	/* UD_AV CIRCUMVENTION */
-	if (my_qp->qp_type == IB_QPT_UD) {
-		qp_attr->cap.max_send_sge =
-			qpcb->actual_nr_sges_in_sq_wqe - 2;
-		qp_attr->cap.max_recv_sge =
-			qpcb->actual_nr_sges_in_rq_wqe - 2;
-	} else {
-		qp_attr->cap.max_send_sge =
-			qpcb->actual_nr_sges_in_sq_wqe;
-		qp_attr->cap.max_recv_sge =
-			qpcb->actual_nr_sges_in_rq_wqe;
-	}
-
-	qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
-	qp_attr->dest_qp_num = qpcb->dest_qp_nr;
-
-	qp_attr->pkey_index = qpcb->prim_p_key_idx;
-	qp_attr->port_num = qpcb->prim_phys_port;
-	qp_attr->timeout = qpcb->timeout;
-	qp_attr->retry_cnt = qpcb->retry_count;
-	qp_attr->rnr_retry = qpcb->rnr_retry_count;
-
-	qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
-	qp_attr->alt_port_num = qpcb->alt_phys_port;
-	qp_attr->alt_timeout = qpcb->timeout_al;
-
-	qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
-	qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
-
-	/* primary av */
-	qp_attr->ah_attr.sl = qpcb->service_level;
-
-	if (qpcb->send_grh_flag) {
-		qp_attr->ah_attr.ah_flags = IB_AH_GRH;
-	}
-
-	qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
-	qp_attr->ah_attr.dlid = qpcb->dlid;
-	qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
-	qp_attr->ah_attr.port_num = qp_attr->port_num;
-
-	/* primary GRH */
-	qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
-	qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
-	qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
-	qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
-
-	for (cnt = 0; cnt < 16; cnt++)
-		qp_attr->ah_attr.grh.dgid.raw[cnt] =
-			qpcb->dest_gid.byte[cnt];
-
-	/* alternate AV */
-	qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
-	if (qpcb->send_grh_flag_al) {
-		qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
-	}
-
-	qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
-	qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
-	qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
-
-	/* alternate GRH */
-	qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
-	qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
-	qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
-	qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
-
-	for (cnt = 0; cnt < 16; cnt++)
-		qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
-			qpcb->dest_gid_al.byte[cnt];
-
-	/* return init attributes given in ehca_create_qp */
-	if (qp_init_attr)
-		*qp_init_attr = my_qp->init_attr;
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
-
-query_qp_exit1:
-	ehca_free_fw_ctrlblock(qpcb);
-
-	return ret;
-}
-
-int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
-{
-	struct ehca_qp *my_qp =
-		container_of(ibsrq, struct ehca_qp, ib_srq);
-	struct ehca_shca *shca =
-		container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
-	struct hcp_modify_qp_control_block *mqpcb;
-	u64 update_mask;
-	u64 h_ret;
-	int ret = 0;
-
-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!mqpcb) {
-		ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
-			 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-		return -ENOMEM;
-	}
-
-	update_mask = 0;
-	if (attr_mask & IB_SRQ_LIMIT) {
-		attr_mask &= ~IB_SRQ_LIMIT;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
-		mqpcb->curr_srq_limit = attr->srq_limit;
-		mqpcb->qp_aff_asyn_ev_log_reg =
-			EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
-	}
-
-	/* by now, all bits in attr_mask should have been cleared */
-	if (attr_mask) {
-		ehca_err(ibsrq->device, "invalid attribute mask bits set  "
-			 "attr_mask=%x", attr_mask);
-		ret = -EINVAL;
-		goto modify_srq_exit0;
-	}
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
-				 NULL, update_mask, mqpcb,
-				 my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
-			 "ehca_qp=%p qp_num=%x",
-			 h_ret, my_qp, my_qp->real_qp_num);
-	}
-
-modify_srq_exit0:
-	ehca_free_fw_ctrlblock(mqpcb);
-
-	return ret;
-}
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
-{
-	struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
-	struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
-					      ib_device);
-	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	struct hcp_modify_qp_control_block *qpcb;
-	int ret = 0;
-	u64 h_ret;
-
-	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!qpcb) {
-		ehca_err(srq->device, "Out of memory for qpcb "
-			 "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
-				NULL, qpcb, my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(srq->device, "hipz_h_query_qp() failed "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, h_ret);
-		goto query_srq_exit1;
-	}
-
-	srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
-	srq_attr->max_sge = 3;
-	srq_attr->srq_limit = qpcb->curr_srq_limit;
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-query_srq_exit1:
-	ehca_free_fw_ctrlblock(qpcb);
-
-	return ret;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-			       struct ib_uobject *uobject)
-{
-	struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
-	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-					     ib_pd);
-	struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
-	u32 qp_num = my_qp->real_qp_num;
-	int ret;
-	u64 h_ret;
-	u8 port_num;
-	int is_user = 0;
-	enum ib_qp_type	qp_type;
-	unsigned long flags;
-
-	if (uobject) {
-		is_user = 1;
-		if (my_qp->mm_count_galpa ||
-		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-			ehca_err(dev, "Resources still referenced in "
-				 "user space qp_num=%x", qp_num);
-			return -EINVAL;
-		}
-	}
-
-	if (my_qp->send_cq) {
-		ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
-		if (ret) {
-			ehca_err(dev, "Couldn't unassign qp from "
-				 "send_cq ret=%i qp_num=%x cq_num=%x", ret,
-				 qp_num, my_qp->send_cq->cq_number);
-			return ret;
-		}
-	}
-
-	write_lock_irqsave(&ehca_qp_idr_lock, flags);
-	idr_remove(&ehca_qp_idr, my_qp->token);
-	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-	/*
-	 * SRQs will never get into an error list and do not have a recv_cq,
-	 * so we need to skip them here.
-	 */
-	if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
-		del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
-
-	if (HAS_SQ(my_qp) && !is_user)
-		del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-	/* now wait until all pending events have completed */
-	wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
-
-	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
-			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
-		return ehca2ib_return_code(h_ret);
-	}
-
-	port_num = my_qp->init_attr.port_num;
-	qp_type  = my_qp->init_attr.qp_type;
-
-	if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-		spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-		kfree(my_qp->mod_qp_parm);
-		my_qp->mod_qp_parm = NULL;
-		shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
-		spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-	}
-
-	/* no support for IB_QPT_SMI yet */
-	if (qp_type == IB_QPT_GSI) {
-		struct ib_event event;
-		ehca_info(dev, "device %s: port %x is inactive.",
-				shca->ib_device.name, port_num);
-		event.device = &shca->ib_device;
-		event.event = IB_EVENT_PORT_ERR;
-		event.element.port_num = port_num;
-		shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
-		ib_dispatch_event(&event);
-	}
-
-	if (HAS_RQ(my_qp)) {
-		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-		if (!is_user)
-			vfree(my_qp->rq_map.map);
-	}
-	if (HAS_SQ(my_qp)) {
-		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-		if (!is_user)
-			vfree(my_qp->sq_map.map);
-	}
-	kmem_cache_free(qp_cache, my_qp);
-	atomic_dec(&shca->num_qps);
-	return 0;
-}
-
-int ehca_destroy_qp(struct ib_qp *qp)
-{
-	return internal_destroy_qp(qp->device,
-				   container_of(qp, struct ehca_qp, ib_qp),
-				   qp->uobject);
-}
-
-int ehca_destroy_srq(struct ib_srq *srq)
-{
-	return internal_destroy_qp(srq->device,
-				   container_of(srq, struct ehca_qp, ib_srq),
-				   srq->uobject);
-}
-
-int ehca_init_qp_cache(void)
-{
-	qp_cache = kmem_cache_create("ehca_cache_qp",
-				     sizeof(struct ehca_qp), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!qp_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_qp_cache(void)
-{
-	kmem_cache_destroy(qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
deleted file mode 100644
index 11813b8..0000000
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ /dev/null
@@ -1,953 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  post_send/recv, poll_cq, req_notify
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-/* in RC traffic, insert an empty RDMA READ every this many packets */
-#define ACK_CIRC_THRESHOLD 2000000
-
-static u64 replace_wr_id(u64 wr_id, u16 idx)
-{
-	u64 ret;
-
-	ret = wr_id & ~QMAP_IDX_MASK;
-	ret |= idx & QMAP_IDX_MASK;
-
-	return ret;
-}
-
-static u16 get_app_wr_id(u64 wr_id)
-{
-	return wr_id & QMAP_IDX_MASK;
-}
-
-static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
-				  struct ehca_wqe *wqe_p,
-				  struct ib_recv_wr *recv_wr,
-				  u32 rq_map_idx)
-{
-	u8 cnt_ds;
-	if (unlikely((recv_wr->num_sge < 0) ||
-		     (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
-		ehca_gen_err("Invalid number of WQE SGE. "
-			 "num_sqe=%x max_nr_of_sg=%x",
-			 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
-		return -EINVAL; /* invalid SG list length */
-	}
-
-	/* clear wqe header until sglist */
-	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-	wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
-	wqe_p->nr_of_data_seg = recv_wr->num_sge;
-
-	for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
-		wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
-			recv_wr->sg_list[cnt_ds].addr;
-		wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
-			recv_wr->sg_list[cnt_ds].lkey;
-		wqe_p->u.all_rcv.sg_list[cnt_ds].length =
-			recv_wr->sg_list[cnt_ds].length;
-	}
-
-	if (ehca_debug_level >= 3) {
-		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
-			     ipz_rqueue);
-		ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
-	}
-
-	return 0;
-}
-
-#if defined(DEBUG_GSI_SEND_WR)
-
-/* need ib_mad struct */
-#include <rdma/ib_mad.h>
-
-static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
-{
-	int idx;
-	int j;
-	while (ud_wr) {
-		struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
-		struct ib_sge *sge = ud_wr->wr.sg_list;
-		ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
-			     "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
-			     ud_wr->wr.num_sge, ud_wr->wr.send_flags,
-			     ud_wr->.wr.opcode);
-		if (mad_hdr) {
-			ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
-				     "mgmt_class=%x class_version=%x method=%x "
-				     "status=%x class_specific=%x tid=%lx "
-				     "attr_id=%x resv=%x attr_mod=%x",
-				     idx, mad_hdr->base_version,
-				     mad_hdr->mgmt_class,
-				     mad_hdr->class_version, mad_hdr->method,
-				     mad_hdr->status, mad_hdr->class_specific,
-				     mad_hdr->tid, mad_hdr->attr_id,
-				     mad_hdr->resv,
-				     mad_hdr->attr_mod);
-		}
-		for (j = 0; j < ud_wr->wr.num_sge; j++) {
-			u8 *data = __va(sge->addr);
-			ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
-				     "lkey=%x",
-				     idx, j, data, sge->length, sge->lkey);
-			/* assume length is n*16 */
-			ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
-				 idx, j);
-			sge++;
-		} /* eof for j */
-		idx++;
-		ud_wr = ud_wr(ud_wr->wr.next);
-	} /* eof while ud_wr */
-}
-
-#endif /* DEBUG_GSI_SEND_WR */
-
-static inline int ehca_write_swqe(struct ehca_qp *qp,
-				  struct ehca_wqe *wqe_p,
-				  struct ib_send_wr *send_wr,
-				  u32 sq_map_idx,
-				  int hidden)
-{
-	u32 idx;
-	u64 dma_length;
-	struct ehca_av *my_av;
-	u32 remote_qkey;
-	struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
-
-	if (unlikely((send_wr->num_sge < 0) ||
-		     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
-		ehca_gen_err("Invalid number of WQE SGE. "
-			 "num_sqe=%x max_nr_of_sg=%x",
-			 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
-		return -EINVAL; /* invalid SG list length */
-	}
-
-	/* clear wqe header until sglist */
-	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-	wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
-
-	qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
-	qmap_entry->reported = 0;
-	qmap_entry->cqe_req = 0;
-
-	switch (send_wr->opcode) {
-	case IB_WR_SEND:
-	case IB_WR_SEND_WITH_IMM:
-		wqe_p->optype = WQE_OPTYPE_SEND;
-		break;
-	case IB_WR_RDMA_WRITE:
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
-		break;
-	case IB_WR_RDMA_READ:
-		wqe_p->optype = WQE_OPTYPE_RDMAREAD;
-		break;
-	default:
-		ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
-		return -EINVAL; /* invalid opcode */
-	}
-
-	wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
-
-	wqe_p->wr_flag = 0;
-
-	if ((send_wr->send_flags & IB_SEND_SIGNALED ||
-	    qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
-	    && !hidden) {
-		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
-		qmap_entry->cqe_req = 1;
-	}
-
-	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
-	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
-		/* this might not work as long as HW does not support it */
-		wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
-		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
-	}
-
-	wqe_p->nr_of_data_seg = send_wr->num_sge;
-
-	switch (qp->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		/* no break is intential here */
-	case IB_QPT_UD:
-		/* IB 1.2 spec C10-15 compliance */
-		remote_qkey = ud_wr(send_wr)->remote_qkey;
-		if (remote_qkey & 0x80000000)
-			remote_qkey = qp->qkey;
-
-		wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
-		wqe_p->local_ee_context_qkey = remote_qkey;
-		if (unlikely(!ud_wr(send_wr)->ah)) {
-			ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
-			return -EINVAL;
-		}
-		if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
-			ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
-			return -EINVAL;
-		}
-		my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
-		wqe_p->u.ud_av.ud_av = my_av->av;
-
-		/*
-		 * omitted check of IB_SEND_INLINE
-		 * since HW does not support it
-		 */
-		for (idx = 0; idx < send_wr->num_sge; idx++) {
-			wqe_p->u.ud_av.sg_list[idx].vaddr =
-				send_wr->sg_list[idx].addr;
-			wqe_p->u.ud_av.sg_list[idx].lkey =
-				send_wr->sg_list[idx].lkey;
-			wqe_p->u.ud_av.sg_list[idx].length =
-				send_wr->sg_list[idx].length;
-		} /* eof for idx */
-		if (qp->qp_type == IB_QPT_SMI ||
-		    qp->qp_type == IB_QPT_GSI)
-			wqe_p->u.ud_av.ud_av.pmtu = 1;
-		if (qp->qp_type == IB_QPT_GSI) {
-			wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
-#ifdef DEBUG_GSI_SEND_WR
-			trace_ud_wr(ud_wr(send_wr));
-#endif /* DEBUG_GSI_SEND_WR */
-		}
-		break;
-
-	case IB_QPT_UC:
-		if (send_wr->send_flags & IB_SEND_FENCE)
-			wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
-		/* no break is intentional here */
-	case IB_QPT_RC:
-		/* TODO: atomic not implemented */
-		wqe_p->u.nud.remote_virtual_address =
-			rdma_wr(send_wr)->remote_addr;
-		wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
-
-		/*
-		 * omitted checking of IB_SEND_INLINE
-		 * since HW does not support it
-		 */
-		dma_length = 0;
-		for (idx = 0; idx < send_wr->num_sge; idx++) {
-			wqe_p->u.nud.sg_list[idx].vaddr =
-				send_wr->sg_list[idx].addr;
-			wqe_p->u.nud.sg_list[idx].lkey =
-				send_wr->sg_list[idx].lkey;
-			wqe_p->u.nud.sg_list[idx].length =
-				send_wr->sg_list[idx].length;
-			dma_length += send_wr->sg_list[idx].length;
-		} /* eof idx */
-		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
-
-		/* unsolicited ack circumvention */
-		if (send_wr->opcode == IB_WR_RDMA_READ) {
-			/* on RDMA read, switch on and reset counters */
-			qp->message_count = qp->packet_count = 0;
-			qp->unsol_ack_circ = 1;
-		} else
-			/* else estimate #packets */
-			qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
-
-		break;
-
-	default:
-		ehca_gen_err("Invalid qptype=%x", qp->qp_type);
-		return -EINVAL;
-	}
-
-	if (ehca_debug_level >= 3) {
-		ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
-		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
-	}
-	return 0;
-}
-
-/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
-static inline void map_ib_wc_status(u32 cqe_status,
-				    enum ib_wc_status *wc_status)
-{
-	if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
-		switch (cqe_status & 0x3F) {
-		case 0x01:
-		case 0x21:
-			*wc_status = IB_WC_LOC_LEN_ERR;
-			break;
-		case 0x02:
-		case 0x22:
-			*wc_status = IB_WC_LOC_QP_OP_ERR;
-			break;
-		case 0x03:
-		case 0x23:
-			*wc_status = IB_WC_LOC_EEC_OP_ERR;
-			break;
-		case 0x04:
-		case 0x24:
-			*wc_status = IB_WC_LOC_PROT_ERR;
-			break;
-		case 0x05:
-		case 0x25:
-			*wc_status = IB_WC_WR_FLUSH_ERR;
-			break;
-		case 0x06:
-			*wc_status = IB_WC_MW_BIND_ERR;
-			break;
-		case 0x07: /* remote error - look into bits 20:24 */
-			switch ((cqe_status
-				 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
-			case 0x0:
-				/*
-				 * PSN Sequence Error!
-				 * couldn't find a matching status!
-				 */
-				*wc_status = IB_WC_GENERAL_ERR;
-				break;
-			case 0x1:
-				*wc_status = IB_WC_REM_INV_REQ_ERR;
-				break;
-			case 0x2:
-				*wc_status = IB_WC_REM_ACCESS_ERR;
-				break;
-			case 0x3:
-				*wc_status = IB_WC_REM_OP_ERR;
-				break;
-			case 0x4:
-				*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
-				break;
-			}
-			break;
-		case 0x08:
-			*wc_status = IB_WC_RETRY_EXC_ERR;
-			break;
-		case 0x09:
-			*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
-			break;
-		case 0x0A:
-		case 0x2D:
-			*wc_status = IB_WC_REM_ABORT_ERR;
-			break;
-		case 0x0B:
-		case 0x2E:
-			*wc_status = IB_WC_INV_EECN_ERR;
-			break;
-		case 0x0C:
-		case 0x2F:
-			*wc_status = IB_WC_INV_EEC_STATE_ERR;
-			break;
-		case 0x0D:
-			*wc_status = IB_WC_BAD_RESP_ERR;
-			break;
-		case 0x10:
-			/* WQE purged */
-			*wc_status = IB_WC_WR_FLUSH_ERR;
-			break;
-		default:
-			*wc_status = IB_WC_FATAL_ERR;
-
-		}
-	} else
-		*wc_status = IB_WC_SUCCESS;
-}
-
-static inline int post_one_send(struct ehca_qp *my_qp,
-			 struct ib_send_wr *cur_send_wr,
-			 int hidden)
-{
-	struct ehca_wqe *wqe_p;
-	int ret;
-	u32 sq_map_idx;
-	u64 start_offset = my_qp->ipz_squeue.current_q_offset;
-
-	/* get pointer next to free WQE */
-	wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
-	if (unlikely(!wqe_p)) {
-		/* too many posted work requests: queue overflow */
-		ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
-			 "qp_num=%x", my_qp->ib_qp.qp_num);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Get the index of the WQE in the send queue. The same index is used
-	 * for writing into the sq_map.
-	 */
-	sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
-
-	/* write a SEND WQE into the QUEUE */
-	ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
-	/*
-	 * if something failed,
-	 * reset the free entry pointer to the start value
-	 */
-	if (unlikely(ret)) {
-		my_qp->ipz_squeue.current_q_offset = start_offset;
-		ehca_err(my_qp->ib_qp.device, "Could not write WQE "
-			 "qp_num=%x", my_qp->ib_qp.qp_num);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int ehca_post_send(struct ib_qp *qp,
-		   struct ib_send_wr *send_wr,
-		   struct ib_send_wr **bad_send_wr)
-{
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-	int wqe_cnt = 0;
-	int ret = 0;
-	unsigned long flags;
-
-	/* Reject WR if QP is in RESET, INIT or RTR state */
-	if (unlikely(my_qp->state < IB_QPS_RTS)) {
-		ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-			 my_qp->state, qp->qp_num);
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* LOCK the QUEUE */
-	spin_lock_irqsave(&my_qp->spinlock_s, flags);
-
-	/* Send an empty extra RDMA read if:
-	 *  1) there has been an RDMA read on this connection before
-	 *  2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
-	 *  3) we can be sure that any previous extra RDMA read has been
-	 *     processed so we don't overflow the SQ
-	 */
-	if (unlikely(my_qp->unsol_ack_circ &&
-		     my_qp->packet_count > ACK_CIRC_THRESHOLD &&
-		     my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
-		/* insert an empty RDMA READ to fix up the remote QP state */
-		struct ib_send_wr circ_wr;
-		memset(&circ_wr, 0, sizeof(circ_wr));
-		circ_wr.opcode = IB_WR_RDMA_READ;
-		post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
-		wqe_cnt++;
-		ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
-		my_qp->message_count = my_qp->packet_count = 0;
-	}
-
-	/* loop processes list of send reqs */
-	while (send_wr) {
-		ret = post_one_send(my_qp, send_wr, 0);
-		if (unlikely(ret)) {
-			goto post_send_exit0;
-		}
-		wqe_cnt++;
-		send_wr = send_wr->next;
-	}
-
-post_send_exit0:
-	iosync(); /* serialize GAL register access */
-	hipz_update_sqa(my_qp, wqe_cnt);
-	if (unlikely(ret || ehca_debug_level >= 2))
-		ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-			 my_qp, qp->qp_num, wqe_cnt, ret);
-	my_qp->message_count += wqe_cnt;
-	spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-
-out:
-	if (ret)
-		*bad_send_wr = send_wr;
-	return ret;
-}
-
-static int internal_post_recv(struct ehca_qp *my_qp,
-			      struct ib_device *dev,
-			      struct ib_recv_wr *recv_wr,
-			      struct ib_recv_wr **bad_recv_wr)
-{
-	struct ehca_wqe *wqe_p;
-	int wqe_cnt = 0;
-	int ret = 0;
-	u32 rq_map_idx;
-	unsigned long flags;
-	struct ehca_qmap_entry *qmap_entry;
-
-	if (unlikely(!HAS_RQ(my_qp))) {
-		ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
-			 my_qp, my_qp->real_qp_num, my_qp->ext_type);
-		ret = -ENODEV;
-		goto out;
-	}
-
-	/* LOCK the QUEUE */
-	spin_lock_irqsave(&my_qp->spinlock_r, flags);
-
-	/* loop processes list of recv reqs */
-	while (recv_wr) {
-		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
-		/* get pointer next to free WQE */
-		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
-		if (unlikely(!wqe_p)) {
-			/* too many posted work requests: queue overflow */
-			ret = -ENOMEM;
-			ehca_err(dev, "Too many posted WQEs "
-				"qp_num=%x", my_qp->real_qp_num);
-			goto post_recv_exit0;
-		}
-		/*
-		 * Get the index of the WQE in the recv queue. The same index
-		 * is used for writing into the rq_map.
-		 */
-		rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
-
-		/* write a RECV WQE into the QUEUE */
-		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
-				rq_map_idx);
-		/*
-		 * if something failed,
-		 * reset the free entry pointer to the start value
-		 */
-		if (unlikely(ret)) {
-			my_qp->ipz_rqueue.current_q_offset = start_offset;
-			ret = -EINVAL;
-			ehca_err(dev, "Could not write WQE "
-				"qp_num=%x", my_qp->real_qp_num);
-			goto post_recv_exit0;
-		}
-
-		qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-		qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
-		qmap_entry->reported = 0;
-		qmap_entry->cqe_req = 1;
-
-		wqe_cnt++;
-		recv_wr = recv_wr->next;
-	} /* eof for recv_wr */
-
-post_recv_exit0:
-	iosync(); /* serialize GAL register access */
-	hipz_update_rqa(my_qp, wqe_cnt);
-	if (unlikely(ret || ehca_debug_level >= 2))
-	    ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-		     my_qp, my_qp->real_qp_num, wqe_cnt, ret);
-	spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
-
-out:
-	if (ret)
-		*bad_recv_wr = recv_wr;
-
-	return ret;
-}
-
-int ehca_post_recv(struct ib_qp *qp,
-		   struct ib_recv_wr *recv_wr,
-		   struct ib_recv_wr **bad_recv_wr)
-{
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-
-	/* Reject WR if QP is in RESET state */
-	if (unlikely(my_qp->state == IB_QPS_RESET)) {
-		ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-			 my_qp->state, qp->qp_num);
-		*bad_recv_wr = recv_wr;
-		return -EINVAL;
-	}
-
-	return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
-}
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-		       struct ib_recv_wr *recv_wr,
-		       struct ib_recv_wr **bad_recv_wr)
-{
-	return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
-				  srq->device, recv_wr, bad_recv_wr);
-}
-
-/*
- * ib_wc_opcode table converts ehca wc opcode to ib
- * Since we use zero to indicate invalid opcode, the actual ib opcode must
- * be decremented!!!
- */
-static const u8 ib_wc_opcode[255] = {
-	[0x01] = IB_WC_RECV+1,
-	[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
-	[0x08] = IB_WC_FETCH_ADD+1,
-	[0x10] = IB_WC_COMP_SWAP+1,
-	[0x20] = IB_WC_RDMA_WRITE+1,
-	[0x40] = IB_WC_RDMA_READ+1,
-	[0x80] = IB_WC_SEND+1
-};
-
-/* internal function to poll one entry of cq */
-static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
-{
-	int ret = 0, qmap_tail_idx;
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	struct ehca_cqe *cqe;
-	struct ehca_qp *my_qp;
-	struct ehca_qmap_entry *qmap_entry;
-	struct ehca_queue_map *qmap;
-	int cqe_count = 0, is_error;
-
-repoll:
-	cqe = (struct ehca_cqe *)
-		ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
-	if (!cqe) {
-		ret = -EAGAIN;
-		if (ehca_debug_level >= 3)
-			ehca_dbg(cq->device, "Completion queue is empty  "
-				 "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
-		goto poll_cq_one_exit0;
-	}
-
-	/* prevents loads being reordered across this point */
-	rmb();
-
-	cqe_count++;
-	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
-		struct ehca_qp *qp;
-		int purgeflag;
-		unsigned long flags;
-
-		qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
-		if (!qp) {
-			ehca_err(cq->device, "cq_num=%x qp_num=%x "
-				 "could not find qp -> ignore cqe",
-				 my_cq->cq_number, cqe->local_qp_number);
-			ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
-				 my_cq->cq_number, cqe->local_qp_number);
-			/* ignore this purged cqe */
-			goto repoll;
-		}
-		spin_lock_irqsave(&qp->spinlock_s, flags);
-		purgeflag = qp->sqerr_purgeflag;
-		spin_unlock_irqrestore(&qp->spinlock_s, flags);
-
-		if (purgeflag) {
-			ehca_dbg(cq->device,
-				 "Got CQE with purged bit qp_num=%x src_qp=%x",
-				 cqe->local_qp_number, cqe->remote_qp_number);
-			if (ehca_debug_level >= 2)
-				ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
-					 cqe->local_qp_number,
-					 cqe->remote_qp_number);
-			/*
-			 * ignore this to avoid double cqes of bad wqe
-			 * that caused sqe and turn off purge flag
-			 */
-			qp->sqerr_purgeflag = 0;
-			goto repoll;
-		}
-	}
-
-	is_error = cqe->status & WC_STATUS_ERROR_BIT;
-
-	/* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
-	if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
-		ehca_dbg(cq->device,
-			 "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
-			 is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
-		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-			 my_cq, my_cq->cq_number);
-		ehca_dbg(cq->device,
-			 "ehca_cq=%p cq_num=%x -------------------------",
-			 my_cq, my_cq->cq_number);
-	}
-
-	read_lock(&ehca_qp_idr_lock);
-	my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
-	read_unlock(&ehca_qp_idr_lock);
-	if (!my_qp)
-		goto repoll;
-	wc->qp = &my_qp->ib_qp;
-
-	qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
-	if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
-		/* We got a send completion. */
-		qmap = &my_qp->sq_map;
-	else
-		/* We got a receive completion. */
-		qmap = &my_qp->rq_map;
-
-	/* advance the tail pointer */
-	qmap->tail = qmap_tail_idx;
-
-	if (is_error) {
-		/*
-		 * set left_to_poll to 0 because in error state, we will not
-		 * get any additional CQEs
-		 */
-		my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-							my_qp->sq_map.entries);
-		my_qp->sq_map.left_to_poll = 0;
-		ehca_add_to_err_list(my_qp, 1);
-
-		my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-							my_qp->rq_map.entries);
-		my_qp->rq_map.left_to_poll = 0;
-		if (HAS_RQ(my_qp))
-			ehca_add_to_err_list(my_qp, 0);
-	}
-
-	qmap_entry = &qmap->map[qmap_tail_idx];
-	if (qmap_entry->reported) {
-		ehca_warn(cq->device, "Double cqe on qp_num=%#x",
-				my_qp->real_qp_num);
-		/* found a double cqe, discard it and read next one */
-		goto repoll;
-	}
-
-	wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
-	qmap_entry->reported = 1;
-
-	/* if left_to_poll is decremented to 0, add the QP to the error list */
-	if (qmap->left_to_poll > 0) {
-		qmap->left_to_poll--;
-		if ((my_qp->sq_map.left_to_poll == 0) &&
-				(my_qp->rq_map.left_to_poll == 0)) {
-			ehca_add_to_err_list(my_qp, 1);
-			if (HAS_RQ(my_qp))
-				ehca_add_to_err_list(my_qp, 0);
-		}
-	}
-
-	/* eval ib_wc_opcode */
-	wc->opcode = ib_wc_opcode[cqe->optype]-1;
-	if (unlikely(wc->opcode == -1)) {
-		ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
-			 "ehca_cq=%p cq_num=%x",
-			 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
-		/* dump cqe for other infos */
-		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-			 my_cq, my_cq->cq_number);
-		/* update also queue adder to throw away this entry!!! */
-		goto repoll;
-	}
-
-	/* eval ib_wc_status */
-	if (unlikely(is_error)) {
-		/* complete with errors */
-		map_ib_wc_status(cqe->status, &wc->status);
-		wc->vendor_err = wc->status;
-	} else
-		wc->status = IB_WC_SUCCESS;
-
-	wc->byte_len = cqe->nr_bytes_transferred;
-	wc->pkey_index = cqe->pkey_index;
-	wc->slid = cqe->rlid;
-	wc->dlid_path_bits = cqe->dlid;
-	wc->src_qp = cqe->remote_qp_number;
-	/*
-	 * HW has "Immed data present" and "GRH present" in bits 6 and 5.
-	 * SW defines those in bits 1 and 0, so we can just shift and mask.
-	 */
-	wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
-	wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
-	wc->sl = cqe->service_level;
-
-poll_cq_one_exit0:
-	if (cqe_count > 0)
-		hipz_update_feca(my_cq, cqe_count);
-
-	return ret;
-}
-
-static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
-			       struct ib_wc *wc, int num_entries,
-			       struct ipz_queue *ipz_queue, int on_sq)
-{
-	int nr = 0;
-	struct ehca_wqe *wqe;
-	u64 offset;
-	struct ehca_queue_map *qmap;
-	struct ehca_qmap_entry *qmap_entry;
-
-	if (on_sq)
-		qmap = &my_qp->sq_map;
-	else
-		qmap = &my_qp->rq_map;
-
-	qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-	while ((nr < num_entries) && (qmap_entry->reported == 0)) {
-		/* generate flush CQE */
-
-		memset(wc, 0, sizeof(*wc));
-
-		offset = qmap->next_wqe_idx * ipz_queue->qe_size;
-		wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
-		if (!wqe) {
-			ehca_err(cq->device, "Invalid wqe offset=%#llx on "
-				 "qp_num=%#x", offset, my_qp->real_qp_num);
-			return nr;
-		}
-
-		wc->wr_id = replace_wr_id(wqe->work_request_id,
-					  qmap_entry->app_wr_id);
-
-		if (on_sq) {
-			switch (wqe->optype) {
-			case WQE_OPTYPE_SEND:
-				wc->opcode = IB_WC_SEND;
-				break;
-			case WQE_OPTYPE_RDMAWRITE:
-				wc->opcode = IB_WC_RDMA_WRITE;
-				break;
-			case WQE_OPTYPE_RDMAREAD:
-				wc->opcode = IB_WC_RDMA_READ;
-				break;
-			default:
-				ehca_err(cq->device, "Invalid optype=%x",
-						wqe->optype);
-				return nr;
-			}
-		} else
-			wc->opcode = IB_WC_RECV;
-
-		if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
-			wc->ex.imm_data = wqe->immediate_data;
-			wc->wc_flags |= IB_WC_WITH_IMM;
-		}
-
-		wc->status = IB_WC_WR_FLUSH_ERR;
-
-		wc->qp = &my_qp->ib_qp;
-
-		/* mark as reported and advance next_wqe pointer */
-		qmap_entry->reported = 1;
-		qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
-						qmap->entries);
-		qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-		wc++; nr++;
-	}
-
-	return nr;
-
-}
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
-{
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	int nr;
-	struct ehca_qp *err_qp;
-	struct ib_wc *current_wc = wc;
-	int ret = 0;
-	unsigned long flags;
-	int entries_left = num_entries;
-
-	if (num_entries < 1) {
-		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
-			 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
-		ret = -EINVAL;
-		goto poll_cq_exit0;
-	}
-
-	spin_lock_irqsave(&my_cq->spinlock, flags);
-
-	/* generate flush cqes for send queues */
-	list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
-		nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-				&err_qp->ipz_squeue, 1);
-		entries_left -= nr;
-		current_wc += nr;
-
-		if (entries_left == 0)
-			break;
-	}
-
-	/* generate flush cqes for receive queues */
-	list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
-		nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-				&err_qp->ipz_rqueue, 0);
-		entries_left -= nr;
-		current_wc += nr;
-
-		if (entries_left == 0)
-			break;
-	}
-
-	for (nr = 0; nr < entries_left; nr++) {
-		ret = ehca_poll_cq_one(cq, current_wc);
-		if (ret)
-			break;
-		current_wc++;
-	} /* eof for nr */
-	entries_left -= nr;
-
-	spin_unlock_irqrestore(&my_cq->spinlock, flags);
-	if (ret == -EAGAIN  || !ret)
-		ret = num_entries - entries_left;
-
-poll_cq_exit0:
-	return ret;
-}
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
-{
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	int ret = 0;
-
-	switch (notify_flags & IB_CQ_SOLICITED_MASK) {
-	case IB_CQ_SOLICITED:
-		hipz_set_cqx_n0(my_cq, 1);
-		break;
-	case IB_CQ_NEXT_COMP:
-		hipz_set_cqx_n1(my_cq, 1);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-		unsigned long spl_flags;
-		spin_lock_irqsave(&my_cq->spinlock, spl_flags);
-		ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
-		spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
-	}
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c
deleted file mode 100644
index 376b031..0000000
--- a/drivers/staging/rdma/ehca/ehca_sqp.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  SQP functions
- *
- *  Authors: Khadija Souissi <souissi@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <rdma/ib_mad.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define IB_MAD_STATUS_REDIRECT		cpu_to_be16(0x0002)
-#define IB_MAD_STATUS_UNSUP_VERSION	cpu_to_be16(0x0004)
-#define IB_MAD_STATUS_UNSUP_METHOD	cpu_to_be16(0x0008)
-
-#define IB_PMA_CLASS_PORT_INFO		cpu_to_be16(0x0001)
-
-/**
- * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
- * pair is created successfully, the corresponding port gets active.
- *
- * Define Special Queue pair 0 (SMI QP) is still not supported.
- *
- * @qp_init_attr: Queue pair init attributes with port and queue pair type
- */
-
-u64 ehca_define_sqp(struct ehca_shca *shca,
-		    struct ehca_qp *ehca_qp,
-		    struct ib_qp_init_attr *qp_init_attr)
-{
-	u32 pma_qp_nr, bma_qp_nr;
-	u64 ret;
-	u8 port = qp_init_attr->port_num;
-	int counter;
-
-	shca->sport[port - 1].port_state = IB_PORT_DOWN;
-
-	switch (qp_init_attr->qp_type) {
-	case IB_QPT_SMI:
-		/* function not supported yet */
-		break;
-	case IB_QPT_GSI:
-		ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
-					 ehca_qp->ipz_qp_handle,
-					 ehca_qp->galpas.kernel,
-					 (u32) qp_init_attr->port_num,
-					 &pma_qp_nr, &bma_qp_nr);
-
-		if (ret != H_SUCCESS) {
-			ehca_err(&shca->ib_device,
-				 "Can't define AQP1 for port %x. h_ret=%lli",
-				 port, ret);
-			return ret;
-		}
-		shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
-		ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
-			 port, pma_qp_nr);
-		break;
-	default:
-		ehca_err(&shca->ib_device, "invalid qp_type=%x",
-			 qp_init_attr->qp_type);
-		return H_PARAMETER;
-	}
-
-	if (ehca_nr_ports < 0) /* autodetect mode */
-		return H_SUCCESS;
-
-	for (counter = 0;
-	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
-		     counter < ehca_port_act_time;
-	     counter++) {
-		ehca_dbg(&shca->ib_device, "... wait until port %x is active",
-			 port);
-		msleep_interruptible(1000);
-	}
-
-	if (counter == ehca_port_act_time) {
-		ehca_err(&shca->ib_device, "Port %x is not active.", port);
-		return H_HARDWARE;
-	}
-
-	return H_SUCCESS;
-}
-
-struct ib_perf {
-	struct ib_mad_hdr mad_hdr;
-	u8 reserved[40];
-	u8 data[192];
-} __attribute__ ((packed));
-
-/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
-struct tcslfl {
-	u32 tc:8;
-	u32 sl:4;
-	u32 fl:20;
-} __attribute__ ((packed));
-
-/* IP Version/TC/FL packed into 32 bits, as in GRH */
-struct vertcfl {
-	u32 ver:4;
-	u32 tc:8;
-	u32 fl:20;
-} __attribute__ ((packed));
-
-static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
-			     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			     const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
-	const struct ib_perf *in_perf = (const struct ib_perf *)in_mad;
-	struct ib_perf *out_perf = (struct ib_perf *)out_mad;
-	struct ib_class_port_info *poi =
-		(struct ib_class_port_info *)out_perf->data;
-	struct tcslfl *tcslfl =
-		(struct tcslfl *)&poi->redirect_tcslfl;
-	struct ehca_shca *shca =
-		container_of(ibdev, struct ehca_shca, ib_device);
-	struct ehca_sport *sport = &shca->sport[port_num - 1];
-
-	ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
-
-	*out_mad = *in_mad;
-
-	if (in_perf->mad_hdr.class_version != 1) {
-		ehca_warn(ibdev, "Unsupported class_version=%x",
-			  in_perf->mad_hdr.class_version);
-		out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
-		goto perf_reply;
-	}
-
-	switch (in_perf->mad_hdr.method) {
-	case IB_MGMT_METHOD_GET:
-	case IB_MGMT_METHOD_SET:
-		/* set class port info for redirection */
-		out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
-		out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
-		memset(poi, 0, sizeof(*poi));
-		poi->base_version = 1;
-		poi->class_version = 1;
-		poi->resp_time_value = 18;
-
-		/* copy local routing information from WC where applicable */
-		tcslfl->sl         = in_wc->sl;
-		poi->redirect_lid  =
-			sport->saved_attr.lid | in_wc->dlid_path_bits;
-		poi->redirect_qp   = sport->pma_qp_nr;
-		poi->redirect_qkey = IB_QP1_QKEY;
-
-		ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
-				&poi->redirect_pkey);
-
-		/* if request was globally routed, copy route info */
-		if (in_grh) {
-			const struct vertcfl *vertcfl =
-				(const struct vertcfl *)&in_grh->version_tclass_flow;
-			memcpy(poi->redirect_gid, in_grh->dgid.raw,
-			       sizeof(poi->redirect_gid));
-			tcslfl->tc        = vertcfl->tc;
-			tcslfl->fl        = vertcfl->fl;
-		} else
-			/* else only fill in default GID */
-			ehca_query_gid(ibdev, port_num, 0,
-				       (union ib_gid *)&poi->redirect_gid);
-
-		ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
-			 sport->saved_attr.lid, sport->pma_qp_nr);
-		break;
-
-	case IB_MGMT_METHOD_GET_RESP:
-		return IB_MAD_RESULT_FAILURE;
-
-	default:
-		out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
-		break;
-	}
-
-perf_reply:
-	out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
-
-	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in, size_t in_mad_size,
-		     struct ib_mad_hdr *out, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index)
-{
-	int ret;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
-
-	if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
-		return IB_MAD_RESULT_FAILURE;
-
-	/* accept only pma request */
-	if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
-		return IB_MAD_RESULT_SUCCESS;
-
-	ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-	ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
-				in_mad, out_mad);
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h
deleted file mode 100644
index d280b12..0000000
--- a/drivers/staging/rdma/ehca/ehca_tools.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  auxiliary functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef EHCA_TOOLS_H
-#define EHCA_TOOLS_H
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/idr.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/vmalloc.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/device.h>
-
-#include <linux/atomic.h>
-#include <asm/ibmebus.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/hvcall.h>
-
-extern int ehca_debug_level;
-
-#define ehca_dbg(ib_dev, format, arg...) \
-	do { \
-		if (unlikely(ehca_debug_level)) \
-			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
-				   "PU%04x EHCA_DBG:%s " format "\n", \
-				   raw_smp_processor_id(), __func__, \
-				   ## arg); \
-	} while (0)
-
-#define ehca_info(ib_dev, format, arg...) \
-	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-		 raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_warn(ib_dev, format, arg...) \
-	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-		 raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_err(ib_dev, format, arg...) \
-	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-		raw_smp_processor_id(), __func__, ## arg)
-
-/* use this one only if no ib_dev available */
-#define ehca_gen_dbg(format, arg...) \
-	do { \
-		if (unlikely(ehca_debug_level)) \
-			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-			       raw_smp_processor_id(), __func__, ## arg); \
-	} while (0)
-
-#define ehca_gen_warn(format, arg...) \
-	printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-	       raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_gen_err(format, arg...) \
-	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-	       raw_smp_processor_id(), __func__, ## arg)
-
-/**
- * ehca_dmp - printk a memory block, whose length is n*8 bytes.
- * Each line has the following layout:
- * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
- */
-#define ehca_dmp(adr, len, format, args...) \
-	do { \
-		unsigned int x; \
-		unsigned int l = (unsigned int)(len); \
-		unsigned char *deb = (unsigned char *)(adr); \
-		for (x = 0; x < l; x += 16) { \
-			printk(KERN_INFO "EHCA_DMP:%s " format \
-			       " adr=%p ofs=%04x %016llx %016llx\n", \
-			       __func__, ##args, deb, x, \
-			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
-			deb += 16; \
-		} \
-	} while (0)
-
-/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
-
-/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
-
-/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
-
-/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
-
-/**
- * EHCA_BMASK_SET - return value shifted and masked by mask
- * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
- * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
- * in variable
- */
-#define EHCA_BMASK_SET(mask, value) \
-	((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
-
-/**
- * EHCA_BMASK_GET - extract a parameter from value by mask
- */
-#define EHCA_BMASK_GET(mask, value) \
-	(EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
-
-/* Converts ehca to ib return code */
-int ehca2ib_return_code(u64 ehca_rc);
-
-#endif /* EHCA_TOOLS_H */
diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c
deleted file mode 100644
index 1a1d5d9..0000000
--- a/drivers/staging/rdma/ehca/ehca_uverbs.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  userspace support verbs
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-					struct ib_udata *udata)
-{
-	struct ehca_ucontext *my_context;
-
-	my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
-	if (!my_context) {
-		ehca_err(device, "Out of memory device=%p", device);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return &my_context->ib_ucontext;
-}
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
-	return 0;
-}
-
-static void ehca_mm_open(struct vm_area_struct *vma)
-{
-	u32 *count = (u32 *)vma->vm_private_data;
-	if (!count) {
-		ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-			     vma->vm_start, vma->vm_end);
-		return;
-	}
-	(*count)++;
-	if (!(*count))
-		ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
-			     vma->vm_start, vma->vm_end);
-	ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-		     vma->vm_start, vma->vm_end, *count);
-}
-
-static void ehca_mm_close(struct vm_area_struct *vma)
-{
-	u32 *count = (u32 *)vma->vm_private_data;
-	if (!count) {
-		ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-			     vma->vm_start, vma->vm_end);
-		return;
-	}
-	(*count)--;
-	ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-		     vma->vm_start, vma->vm_end, *count);
-}
-
-static const struct vm_operations_struct vm_ops = {
-	.open =	ehca_mm_open,
-	.close = ehca_mm_close,
-};
-
-static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
-			u32 *mm_count)
-{
-	int ret;
-	u64 vsize, physical;
-
-	vsize = vma->vm_end - vma->vm_start;
-	if (vsize < EHCA_PAGESIZE) {
-		ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
-		return -EINVAL;
-	}
-
-	physical = galpas->user.fw_handle;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
-	ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
-			   vma->vm_page_prot);
-	if (unlikely(ret)) {
-		ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
-		return -ENOMEM;
-	}
-
-	vma->vm_private_data = mm_count;
-	(*mm_count)++;
-	vma->vm_ops = &vm_ops;
-
-	return 0;
-}
-
-static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
-			   u32 *mm_count)
-{
-	int ret;
-	u64 start, ofs;
-	struct page *page;
-
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	start = vma->vm_start;
-	for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
-		u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
-		page = virt_to_page(virt_addr);
-		ret = vm_insert_page(vma, start, page);
-		if (unlikely(ret)) {
-			ehca_gen_err("vm_insert_page() failed rc=%i", ret);
-			return ret;
-		}
-		start += PAGE_SIZE;
-	}
-	vma->vm_private_data = mm_count;
-	(*mm_count)++;
-	vma->vm_ops = &vm_ops;
-
-	return 0;
-}
-
-static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
-			u32 rsrc_type)
-{
-	int ret;
-
-	switch (rsrc_type) {
-	case 0: /* galpa fw handle */
-		ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
-		ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
-		if (unlikely(ret)) {
-			ehca_err(cq->ib_cq.device,
-				 "ehca_mmap_fw() failed rc=%i cq_num=%x",
-				 ret, cq->cq_number);
-			return ret;
-		}
-		break;
-
-	case 1: /* cq queue_addr */
-		ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
-		ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
-		if (unlikely(ret)) {
-			ehca_err(cq->ib_cq.device,
-				 "ehca_mmap_queue() failed rc=%i cq_num=%x",
-				 ret, cq->cq_number);
-			return ret;
-		}
-		break;
-
-	default:
-		ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
-			 rsrc_type, cq->cq_number);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
-			u32 rsrc_type)
-{
-	int ret;
-
-	switch (rsrc_type) {
-	case 0: /* galpa fw handle */
-		ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
-		ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "remap_pfn_range() failed ret=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return -ENOMEM;
-		}
-		break;
-
-	case 1: /* qp rqueue_addr */
-		ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
-		ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
-				      &qp->mm_count_rqueue);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return ret;
-		}
-		break;
-
-	case 2: /* qp squeue_addr */
-		ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
-		ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
-				      &qp->mm_count_squeue);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return ret;
-		}
-		break;
-
-	default:
-		ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
-			 rsrc_type, qp->ib_qp.qp_num);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	u64 fileoffset = vma->vm_pgoff;
-	u32 idr_handle = fileoffset & 0x1FFFFFF;
-	u32 q_type = (fileoffset >> 27) & 0x1;	  /* CQ, QP,...        */
-	u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
-	u32 ret;
-	struct ehca_cq *cq;
-	struct ehca_qp *qp;
-	struct ib_uobject *uobject;
-
-	switch (q_type) {
-	case  0: /* CQ */
-		read_lock(&ehca_cq_idr_lock);
-		cq = idr_find(&ehca_cq_idr, idr_handle);
-		read_unlock(&ehca_cq_idr_lock);
-
-		/* make sure this mmap really belongs to the authorized user */
-		if (!cq)
-			return -EINVAL;
-
-		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
-			return -EINVAL;
-
-		ret = ehca_mmap_cq(vma, cq, rsrc_type);
-		if (unlikely(ret)) {
-			ehca_err(cq->ib_cq.device,
-				 "ehca_mmap_cq() failed rc=%i cq_num=%x",
-				 ret, cq->cq_number);
-			return ret;
-		}
-		break;
-
-	case 1: /* QP */
-		read_lock(&ehca_qp_idr_lock);
-		qp = idr_find(&ehca_qp_idr, idr_handle);
-		read_unlock(&ehca_qp_idr_lock);
-
-		/* make sure this mmap really belongs to the authorized user */
-		if (!qp)
-			return -EINVAL;
-
-		uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
-		if (!uobject || uobject->context != context)
-			return -EINVAL;
-
-		ret = ehca_mmap_qp(vma, qp, rsrc_type);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "ehca_mmap_qp() failed rc=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return ret;
-		}
-		break;
-
-	default:
-		ehca_gen_err("bad queue type %x", q_type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c
deleted file mode 100644
index 89517ff..0000000
--- a/drivers/staging/rdma/ehca/hcp_if.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <asm/hvcall.h>
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hcp_phyp.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
-#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
-#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
-#define H_ALL_RES_QP_STORAGE            EHCA_BMASK_IBM(16, 17)
-#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
-#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
-#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
-#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
-#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
-#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
-#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
-
-#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
-#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
-#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
-
-#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
-#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
-#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
-
-#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
-#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
-
-#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
-
-#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
-#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
-#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
-
-#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
-#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
-#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
-
-static DEFINE_SPINLOCK(hcall_lock);
-
-static long ehca_plpar_hcall_norets(unsigned long opcode,
-				    unsigned long arg1,
-				    unsigned long arg2,
-				    unsigned long arg3,
-				    unsigned long arg4,
-				    unsigned long arg5,
-				    unsigned long arg6,
-				    unsigned long arg7)
-{
-	long ret;
-	int i, sleep_msecs;
-	unsigned long flags = 0;
-
-	if (unlikely(ehca_debug_level >= 2))
-		ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
-			     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
-
-	for (i = 0; i < 5; i++) {
-		/* serialize hCalls to work around firmware issue */
-		if (ehca_lock_hcalls)
-			spin_lock_irqsave(&hcall_lock, flags);
-
-		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
-					 arg5, arg6, arg7);
-
-		if (ehca_lock_hcalls)
-			spin_unlock_irqrestore(&hcall_lock, flags);
-
-		if (H_IS_LONG_BUSY(ret)) {
-			sleep_msecs = get_longbusy_msecs(ret);
-			msleep_interruptible(sleep_msecs);
-			continue;
-		}
-
-		if (ret < H_SUCCESS)
-			ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
-				     opcode, ret, arg1, arg2, arg3,
-				     arg4, arg5, arg6, arg7);
-		else
-			if (unlikely(ehca_debug_level >= 2))
-				ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
-
-		return ret;
-	}
-
-	return H_BUSY;
-}
-
-static long ehca_plpar_hcall9(unsigned long opcode,
-			      unsigned long *outs, /* array of 9 outputs */
-			      unsigned long arg1,
-			      unsigned long arg2,
-			      unsigned long arg3,
-			      unsigned long arg4,
-			      unsigned long arg5,
-			      unsigned long arg6,
-			      unsigned long arg7,
-			      unsigned long arg8,
-			      unsigned long arg9)
-{
-	long ret;
-	int i, sleep_msecs;
-	unsigned long flags = 0;
-
-	if (unlikely(ehca_debug_level >= 2))
-		ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
-			     arg1, arg2, arg3, arg4, arg5,
-			     arg6, arg7, arg8, arg9);
-
-	for (i = 0; i < 5; i++) {
-		/* serialize hCalls to work around firmware issue */
-		if (ehca_lock_hcalls)
-			spin_lock_irqsave(&hcall_lock, flags);
-
-		ret = plpar_hcall9(opcode, outs,
-				   arg1, arg2, arg3, arg4, arg5,
-				   arg6, arg7, arg8, arg9);
-
-		if (ehca_lock_hcalls)
-			spin_unlock_irqrestore(&hcall_lock, flags);
-
-		if (H_IS_LONG_BUSY(ret)) {
-			sleep_msecs = get_longbusy_msecs(ret);
-			msleep_interruptible(sleep_msecs);
-			continue;
-		}
-
-		if (ret < H_SUCCESS) {
-			ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
-				     opcode, arg1, arg2, arg3, arg4, arg5,
-				     arg6, arg7, arg8, arg9);
-			ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-				     ret, outs[0], outs[1], outs[2], outs[3],
-				     outs[4], outs[5], outs[6], outs[7],
-				     outs[8]);
-		} else if (unlikely(ehca_debug_level >= 2))
-			ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-				     ret, outs[0], outs[1], outs[2], outs[3],
-				     outs[4], outs[5], outs[6], outs[7],
-				     outs[8]);
-		return ret;
-	}
-
-	return H_BUSY;
-}
-
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u32 neq_control,
-			     const u32 number_of_entries,
-			     struct ipz_eq_handle *eq_handle,
-			     u32 *act_nr_of_entries,
-			     u32 *act_pages,
-			     u32 *eq_ist)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-	u64 allocate_controls;
-
-	/* resource type */
-	allocate_controls = 3ULL;
-
-	/* ISN is associated */
-	if (neq_control != 1)
-		allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
-	else /* notification event queue */
-		allocate_controls = (1ULL << 63) | allocate_controls;
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,  /* r4 */
-				allocate_controls,      /* r5 */
-				number_of_entries,      /* r6 */
-				0, 0, 0, 0, 0, 0);
-	eq_handle->handle = outs[0];
-	*act_nr_of_entries = (u32)outs[3];
-	*act_pages = (u32)outs[4];
-	*eq_ist = (u32)outs[5];
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resource - ret=%lli ", ret);
-
-	return ret;
-}
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-		       struct ipz_eq_handle eq_handle,
-		       const u64 event_mask)
-{
-	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
-				       adapter_handle.handle, /* r4 */
-				       eq_handle.handle,      /* r5 */
-				       event_mask,	      /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_cq *cq,
-			     struct ehca_alloc_cq_parms *param)
-{
-	int rc;
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,   /* r4  */
-				2,	                 /* r5  */
-				param->eq_handle.handle, /* r6  */
-				cq->token,	         /* r7  */
-				param->nr_cqe,           /* r8  */
-				0, 0, 0, 0);
-	cq->ipz_cq_handle.handle = outs[0];
-	param->act_nr_of_entries = (u32)outs[3];
-	param->act_pages = (u32)outs[4];
-
-	if (ret == H_SUCCESS) {
-		rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
-		if (rc) {
-			ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-				     rc, outs[5]);
-
-			ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-						adapter_handle.handle,     /* r4 */
-						cq->ipz_cq_handle.handle,  /* r5 */
-						0, 0, 0, 0, 0);
-			ret = H_NO_MEM;
-		}
-	}
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms, int is_user)
-{
-	int rc;
-	u64 ret;
-	u64 allocate_controls, max_r10_reg, r11, r12;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	allocate_controls =
-		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
-				 parms->squeue.page_size)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
-				 parms->rqueue.page_size)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-				 !!(parms->ll_comp_flags & LLQP_RECV_COMP))
-		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-				 !!(parms->ll_comp_flags & LLQP_SEND_COMP))
-		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
-				 parms->ud_av_l_key_ctl)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
-
-	max_r10_reg =
-		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-			       parms->squeue.max_wr + 1)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-				 parms->rqueue.max_wr + 1)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
-				 parms->squeue.max_sge)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
-				 parms->rqueue.max_sge);
-
-	r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
-
-	if (parms->ext_type == EQPT_SRQ)
-		r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
-	else
-		r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,	           /* r4  */
-				allocate_controls,	           /* r5  */
-				parms->send_cq_handle.handle,
-				parms->recv_cq_handle.handle,
-				parms->eq_handle.handle,
-				((u64)parms->token << 32) | parms->pd.value,
-				max_r10_reg, r11, r12);
-
-	parms->qp_handle.handle = outs[0];
-	parms->real_qp_num = (u32)outs[1];
-	parms->squeue.act_nr_wqes =
-		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
-	parms->rqueue.act_nr_wqes =
-		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
-	parms->squeue.act_nr_sges =
-		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
-	parms->rqueue.act_nr_sges =
-		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
-	parms->squeue.queue_size =
-		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
-	parms->rqueue.queue_size =
-		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
-
-	if (ret == H_SUCCESS) {
-		rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
-		if (rc) {
-			ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-				     rc, outs[6]);
-
-			ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-						adapter_handle.handle,     /* r4 */
-						parms->qp_handle.handle,  /* r5 */
-						0, 0, 0, 0, 0);
-			ret = H_NO_MEM;
-		}
-	}
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-		      const u8 port_id,
-		      struct hipz_query_port *query_port_response_block)
-{
-	u64 ret;
-	u64 r_cb = __pa(query_port_response_block);
-
-	if (r_cb & (EHCA_PAGESIZE-1)) {
-		ehca_gen_err("response block not page aligned");
-		return H_PARAMETER;
-	}
-
-	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
-				      adapter_handle.handle, /* r4 */
-				      port_id,	             /* r5 */
-				      r_cb,	             /* r6 */
-				      0, 0, 0, 0);
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(query_port_response_block, 64, "response_block");
-
-	return ret;
-}
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-		       const u8 port_id, const u32 port_cap,
-		       const u8 init_type, const int modify_mask)
-{
-	u64 port_attributes = port_cap;
-
-	if (modify_mask & IB_PORT_SHUTDOWN)
-		port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
-	if (modify_mask & IB_PORT_INIT_TYPE)
-		port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
-	if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
-
-	return ehca_plpar_hcall_norets(H_MODIFY_PORT,
-				       adapter_handle.handle, /* r4 */
-				       port_id,               /* r5 */
-				       port_attributes,       /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-		     struct hipz_query_hca *query_hca_rblock)
-{
-	u64 r_cb = __pa(query_hca_rblock);
-
-	if (r_cb & (EHCA_PAGESIZE-1)) {
-		ehca_gen_err("response_block=%p not page aligned",
-			     query_hca_rblock);
-		return H_PARAMETER;
-	}
-
-	return ehca_plpar_hcall_norets(H_QUERY_HCA,
-				       adapter_handle.handle, /* r4 */
-				       r_cb,                  /* r5 */
-				       0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-			  const u8 pagesize,
-			  const u8 queue_type,
-			  const u64 resource_handle,
-			  const u64 logical_address_of_page,
-			  u64 count)
-{
-	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
-				       adapter_handle.handle,      /* r4  */
-				       (u64)queue_type | ((u64)pagesize) << 8,
-				       /* r5  */
-				       resource_handle,	           /* r6  */
-				       logical_address_of_page,    /* r7  */
-				       count,	                   /* r8  */
-				       0, 0);
-}
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_eq_handle eq_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count)
-{
-	if (count != 1) {
-		ehca_gen_err("Ppage counter=%llx", count);
-		return H_PARAMETER;
-	}
-	return hipz_h_register_rpage(adapter_handle,
-				     pagesize,
-				     queue_type,
-				     eq_handle.handle,
-				     logical_address_of_page, count);
-}
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
-			   u32 ist)
-{
-	u64 ret;
-	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
-				      adapter_handle.handle, /* r4 */
-				      ist,                   /* r5 */
-				      0, 0, 0, 0, 0);
-
-	if (ret != H_SUCCESS && ret != H_BUSY)
-		ehca_gen_err("Could not query interrupt state.");
-
-	return ret;
-}
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_cq_handle cq_handle,
-			     struct ehca_pfcq *pfcq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa gal)
-{
-	if (count != 1) {
-		ehca_gen_err("Page counter=%llx", count);
-		return H_PARAMETER;
-	}
-
-	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-				     cq_handle.handle, logical_address_of_page,
-				     count);
-}
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_qp_handle qp_handle,
-			     struct ehca_pfqp *pfqp,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa galpa)
-{
-	if (count > 1) {
-		ehca_gen_err("Page counter=%llx", count);
-		return H_PARAMETER;
-	}
-
-	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-				     qp_handle.handle, logical_address_of_page,
-				     count);
-}
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-			       const struct ipz_qp_handle qp_handle,
-			       struct ehca_pfqp *pfqp,
-			       void **log_addr_next_sq_wqe2processed,
-			       void **log_addr_next_rq_wqe2processed,
-			       int dis_and_get_function_code)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-				adapter_handle.handle,     /* r4 */
-				dis_and_get_function_code, /* r5 */
-				qp_handle.handle,	   /* r6 */
-				0, 0, 0, 0, 0, 0);
-	if (log_addr_next_sq_wqe2processed)
-		*log_addr_next_sq_wqe2processed = (void *)outs[0];
-	if (log_addr_next_rq_wqe2processed)
-		*log_addr_next_rq_wqe2processed = (void *)outs[1];
-
-	return ret;
-}
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-		     const struct ipz_qp_handle qp_handle,
-		     struct ehca_pfqp *pfqp,
-		     const u64 update_mask,
-		     struct hcp_modify_qp_control_block *mqpcb,
-		     struct h_galpa gal)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
-				adapter_handle.handle, /* r4 */
-				qp_handle.handle,      /* r5 */
-				update_mask,	       /* r6 */
-				__pa(mqpcb),	       /* r7 */
-				0, 0, 0, 0, 0);
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Insufficient resources ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-		    const struct ipz_qp_handle qp_handle,
-		    struct ehca_pfqp *pfqp,
-		    struct hcp_modify_qp_control_block *qqpcb,
-		    struct h_galpa gal)
-{
-	return ehca_plpar_hcall_norets(H_QUERY_QP,
-				       adapter_handle.handle, /* r4 */
-				       qp_handle.handle,      /* r5 */
-				       __pa(qqpcb),	      /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_qp *qp)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = hcp_galpas_dtor(&qp->galpas);
-	if (ret) {
-		ehca_gen_err("Could not destruct qp->galpas");
-		return H_RESOURCE;
-	}
-	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-				adapter_handle.handle,     /* r4 */
-				/* function code */
-				1,	                   /* r5 */
-				qp->ipz_qp_handle.handle,  /* r6 */
-				0, 0, 0, 0, 0, 0);
-	if (ret == H_HARDWARE)
-		ehca_gen_err("HCA not operational. ret=%lli", ret);
-
-	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				      adapter_handle.handle,     /* r4 */
-				      qp->ipz_qp_handle.handle,  /* r5 */
-				      0, 0, 0, 0, 0);
-
-	if (ret == H_RESOURCE)
-		ehca_gen_err("Resource still in use. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port)
-{
-	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
-				       adapter_handle.handle, /* r4 */
-				       qp_handle.handle,      /* r5 */
-				       port,                  /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port, u32 * pma_qp_nr,
-		       u32 * bma_qp_nr)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
-				adapter_handle.handle, /* r4 */
-				qp_handle.handle,      /* r5 */
-				port,	               /* r6 */
-				0, 0, 0, 0, 0, 0);
-	*pma_qp_nr = (u32)outs[0];
-	*bma_qp_nr = (u32)outs[1];
-
-	if (ret == H_ALIAS_EXIST)
-		ehca_gen_err("AQP1 already exists. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id)
-{
-	u64 ret;
-
-	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
-				      adapter_handle.handle,  /* r4 */
-				      qp_handle.handle,       /* r5 */
-				      mcg_dlid,               /* r6 */
-				      interface_id,           /* r7 */
-				      subnet_prefix,          /* r8 */
-				      0, 0);
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id)
-{
-	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
-				       adapter_handle.handle, /* r4 */
-				       qp_handle.handle,      /* r5 */
-				       mcg_dlid,              /* r6 */
-				       interface_id,          /* r7 */
-				       subnet_prefix,         /* r8 */
-				       0, 0);
-}
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_cq *cq,
-		      u8 force_flag)
-{
-	u64 ret;
-
-	ret = hcp_galpas_dtor(&cq->galpas);
-	if (ret) {
-		ehca_gen_err("Could not destruct cp->galpas");
-		return H_RESOURCE;
-	}
-
-	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				      adapter_handle.handle,     /* r4 */
-				      cq->ipz_cq_handle.handle,  /* r5 */
-				      force_flag != 0 ? 1L : 0L, /* r6 */
-				      0, 0, 0, 0);
-
-	if (ret == H_RESOURCE)
-		ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
-
-	return ret;
-}
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_eq *eq)
-{
-	u64 ret;
-
-	ret = hcp_galpas_dtor(&eq->galpas);
-	if (ret) {
-		ehca_gen_err("Could not destruct eq->galpas");
-		return H_RESOURCE;
-	}
-
-	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				      adapter_handle.handle,     /* r4 */
-				      eq->ipz_eq_handle.handle,  /* r5 */
-				      0, 0, 0, 0, 0);
-
-	if (ret == H_RESOURCE)
-		ehca_gen_err("Resource in use. ret=%lli ", ret);
-
-	return ret;
-}
-
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u64 vaddr,
-			     const u64 length,
-			     const u32 access_ctrl,
-			     const struct ipz_pd pd,
-			     struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,            /* r4 */
-				5,                                /* r5 */
-				vaddr,                            /* r6 */
-				length,                           /* r7 */
-				(((u64)access_ctrl) << 32ULL),    /* r8 */
-				pd.value,                         /* r9 */
-				0, 0, 0);
-	outparms->handle.handle = outs[0];
-	outparms->lkey = (u32)outs[2];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count)
-{
-	u64 ret;
-
-	if (unlikely(ehca_debug_level >= 3)) {
-		if (count > 1) {
-			u64 *kpage;
-			int i;
-			kpage = __va(logical_address_of_page);
-			for (i = 0; i < count; i++)
-				ehca_gen_dbg("kpage[%d]=%p",
-					     i, (void *)kpage[i]);
-		} else
-			ehca_gen_dbg("kpage=%p",
-				     (void *)logical_address_of_page);
-	}
-
-	if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
-		ehca_gen_err("logical_address_of_page not on a 4k boundary "
-			     "adapter_handle=%llx mr=%p mr_handle=%llx "
-			     "pagesize=%x queue_type=%x "
-			     "logical_address_of_page=%llx count=%llx",
-			     adapter_handle.handle, mr,
-			     mr->ipz_mr_handle.handle, pagesize, queue_type,
-			     logical_address_of_page, count);
-		ret = H_PARAMETER;
-	} else
-		ret = hipz_h_register_rpage(adapter_handle, pagesize,
-					    queue_type,
-					    mr->ipz_mr_handle.handle,
-					    logical_address_of_page, count);
-	return ret;
-}
-
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mr *mr,
-		    struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
-				adapter_handle.handle,     /* r4 */
-				mr->ipz_mr_handle.handle,  /* r5 */
-				0, 0, 0, 0, 0, 0, 0);
-	outparms->len = outs[0];
-	outparms->vaddr = outs[1];
-	outparms->acl  = outs[4] >> 32;
-	outparms->lkey = (u32)(outs[5] >> 32);
-	outparms->rkey = (u32)(outs[5] & (0xffffffff));
-
-	return ret;
-}
-
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mr *mr)
-{
-	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				       adapter_handle.handle,    /* r4 */
-				       mr->ipz_mr_handle.handle, /* r5 */
-				       0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-			  const struct ehca_mr *mr,
-			  const u64 vaddr_in,
-			  const u64 length,
-			  const u32 access_ctrl,
-			  const struct ipz_pd pd,
-			  const u64 mr_addr_cb,
-			  struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
-				adapter_handle.handle,    /* r4 */
-				mr->ipz_mr_handle.handle, /* r5 */
-				vaddr_in,	          /* r6 */
-				length,                   /* r7 */
-				/* r8 */
-				((((u64)access_ctrl) << 32ULL) | pd.value),
-				mr_addr_cb,               /* r9 */
-				0, 0, 0);
-	outparms->vaddr = outs[1];
-	outparms->lkey = (u32)outs[2];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-			const struct ehca_mr *mr,
-			const struct ehca_mr *orig_mr,
-			const u64 vaddr_in,
-			const u32 access_ctrl,
-			const struct ipz_pd pd,
-			struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
-				adapter_handle.handle,            /* r4 */
-				orig_mr->ipz_mr_handle.handle,    /* r5 */
-				vaddr_in,                         /* r6 */
-				(((u64)access_ctrl) << 32ULL),    /* r7 */
-				pd.value,                         /* r8 */
-				0, 0, 0, 0);
-	outparms->handle.handle = outs[0];
-	outparms->lkey = (u32)outs[2];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mw *mw,
-			     const struct ipz_pd pd,
-			     struct ehca_mw_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,      /* r4 */
-				6,                          /* r5 */
-				pd.value,                   /* r6 */
-				0, 0, 0, 0, 0, 0);
-	outparms->handle.handle = outs[0];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mw *mw,
-		    struct ehca_mw_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
-				adapter_handle.handle,    /* r4 */
-				mw->ipz_mw_handle.handle, /* r5 */
-				0, 0, 0, 0, 0, 0, 0);
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mw *mw)
-{
-	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				       adapter_handle.handle,    /* r4 */
-				       mw->ipz_mw_handle.handle, /* r5 */
-				       0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-		      const u64 ressource_handle,
-		      void *rblock,
-		      unsigned long *byte_count)
-{
-	u64 r_cb = __pa(rblock);
-
-	if (r_cb & (EHCA_PAGESIZE-1)) {
-		ehca_gen_err("rblock not page aligned.");
-		return H_PARAMETER;
-	}
-
-	return ehca_plpar_hcall_norets(H_ERROR_DATA,
-				       adapter_handle.handle,
-				       ressource_handle,
-				       r_cb,
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_eoi(int irq)
-{
-	unsigned long xirr;
-
-	iosync();
-	xirr = (0xffULL << 24) | irq;
-
-	return plpar_hcall_norets(H_EOI, xirr);
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h
deleted file mode 100644
index a46e514..0000000
--- a/drivers/staging/rdma/ehca/hcp_if.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_IF_H__
-#define __HCP_IF_H__
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "hipz_hw.h"
-
-/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
- * resources, create the empty EQPT (ring).
- */
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u32 neq_control,
-			     const u32 number_of_entries,
-			     struct ipz_eq_handle *eq_handle,
-			     u32 * act_nr_of_entries,
-			     u32 * act_pages,
-			     u32 * eq_ist);
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-		       struct ipz_eq_handle eq_handle,
-		       const u64 event_mask);
-/*
- * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
- * resources, create the empty CQPT (ring).
- */
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_cq *cq,
-			     struct ehca_alloc_cq_parms *param);
-
-
-/*
- * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
- * initialize resources, create empty QPPTs (2 rings).
- */
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms, int is_user);
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-		      const u8 port_id,
-		      struct hipz_query_port *query_port_response_block);
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-		       const u8 port_id, const u32 port_cap,
-		       const u8 init_type, const int modify_mask);
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-		     struct hipz_query_hca *query_hca_rblock);
-
-/*
- * hipz_h_register_rpage internal function in hcp_if.h for all
- * hcp_H_REGISTER_RPAGE calls.
- */
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-			  const u8 pagesize,
-			  const u8 queue_type,
-			  const u64 resource_handle,
-			  const u64 logical_address_of_page,
-			  u64 count);
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_eq_handle eq_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count);
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle
-			   hcp_adapter_handle,
-			   u32 ist);
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_cq_handle cq_handle,
-			     struct ehca_pfcq *pfcq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa gal);
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_qp_handle qp_handle,
-			     struct ehca_pfqp *pfqp,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa galpa);
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-			       const struct ipz_qp_handle qp_handle,
-			       struct ehca_pfqp *pfqp,
-			       void **log_addr_next_sq_wqe_tb_processed,
-			       void **log_addr_next_rq_wqe_tb_processed,
-			       int dis_and_get_function_code);
-enum hcall_sigt {
-	HCALL_SIGT_NO_CQE = 0,
-	HCALL_SIGT_BY_WQE = 1,
-	HCALL_SIGT_EVERY = 2
-};
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-		     const struct ipz_qp_handle qp_handle,
-		     struct ehca_pfqp *pfqp,
-		     const u64 update_mask,
-		     struct hcp_modify_qp_control_block *mqpcb,
-		     struct h_galpa gal);
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-		    const struct ipz_qp_handle qp_handle,
-		    struct ehca_pfqp *pfqp,
-		    struct hcp_modify_qp_control_block *qqpcb,
-		    struct h_galpa gal);
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_qp *qp);
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port);
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port, u32 * pma_qp_nr,
-		       u32 * bma_qp_nr);
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_cq *cq,
-		      u8 force_flag);
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_eq *eq);
-
-/*
- * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u64 vaddr,
-			     const u64 length,
-			     const u32 access_ctrl,
-			     const struct ipz_pd pd,
-			     struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count);
-
-/* hipz_h_query_mr queries MR in HW and FW */
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mr *mr,
-		    struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mr frees MR resources in HW and FW */
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mr *mr);
-
-/* hipz_h_reregister_pmr reregisters MR in HW and FW */
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-			  const struct ehca_mr *mr,
-			  const u64 vaddr_in,
-			  const u64 length,
-			  const u32 access_ctrl,
-			  const struct ipz_pd pd,
-			  const u64 mr_addr_cb,
-			  struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_smr register shared MR in HW and FW */
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-			const struct ehca_mr *mr,
-			const struct ehca_mr *orig_mr,
-			const u64 vaddr_in,
-			const u32 access_ctrl,
-			const struct ipz_pd pd,
-			struct ehca_mr_hipzout_parms *outparms);
-
-/*
- * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mw *mw,
-			     const struct ipz_pd pd,
-			     struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_query_mw queries MW in HW and FW */
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mw *mw,
-		    struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mw frees MW resources in HW and FW */
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mw *mw);
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-		      const u64 ressource_handle,
-		      void *rblock,
-		      unsigned long *byte_count);
-u64 hipz_h_eoi(int irq);
-
-#endif /* __HCP_IF_H__ */
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c
deleted file mode 100644
index 077376f..0000000
--- a/drivers/staging/rdma/ehca/hcp_phyp.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *   load store abstraction for ehca register access with tracing
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-u64 hcall_map_page(u64 physaddr)
-{
-	return (u64)ioremap(physaddr, EHCA_PAGESIZE);
-}
-
-int hcall_unmap_page(u64 mapaddr)
-{
-	iounmap((volatile void __iomem *) mapaddr);
-	return 0;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-		    u64 paddr_kernel, u64 paddr_user)
-{
-	if (!is_user) {
-		galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
-		if (!galpas->kernel.fw_handle)
-			return -ENOMEM;
-	} else
-		galpas->kernel.fw_handle = 0;
-
-	galpas->user.fw_handle = paddr_user;
-
-	return 0;
-}
-
-int hcp_galpas_dtor(struct h_galpas *galpas)
-{
-	if (galpas->kernel.fw_handle) {
-		int ret = hcall_unmap_page(galpas->kernel.fw_handle);
-		if (ret)
-			return ret;
-	}
-
-	galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h
deleted file mode 100644
index d1b0299..0000000
--- a/drivers/staging/rdma/ehca/hcp_phyp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware calls
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_PHYP_H__
-#define __HCP_PHYP_H__
-
-
-/*
- * eHCA page (mapped into memory)
- * resource to access eHCA register pages in CPU address space
-*/
-struct h_galpa {
-	u64 fw_handle;
-	/* for pSeries this is a 64bit memory address where
-	   I/O memory is mapped into CPU address space (kv) */
-};
-
-/*
- * resource to access eHCA address space registers, all types
- */
-struct h_galpas {
-	u32 pid;		/*PID of userspace galpa checking */
-	struct h_galpa user;	/* user space accessible resource,
-				   set to 0 if unused */
-	struct h_galpa kernel;	/* kernel space accessible resource,
-				   set to 0 if unused */
-};
-
-static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
-{
-	u64 addr = galpa.fw_handle + offset;
-	return *(volatile u64 __force *)addr;
-}
-
-static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
-{
-	u64 addr = galpa.fw_handle + offset;
-	*(volatile u64 __force *)addr = value;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-		    u64 paddr_kernel, u64 paddr_user);
-
-int hcp_galpas_dtor(struct h_galpas *galpas);
-
-u64 hcall_map_page(u64 physaddr);
-
-int hcall_unmap_page(u64 mapaddr);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h
deleted file mode 100644
index 9dac93d..0000000
--- a/drivers/staging/rdma/ehca/hipz_fns.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_H__
-#define __HIPZ_FNS_H__
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-#include "hipz_fns_core.h"
-
-#define hipz_galpa_store_eq(gal, offset, value) \
-	hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_eq(gal, offset) \
-	hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qped(gal, offset, value) \
-	hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_qped(gal, offset) \
-	hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
-
-#define hipz_galpa_store_mrmw(gal, offset, value) \
-	hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_mrmw(gal, offset) \
-	hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h
deleted file mode 100644
index 868735f..0000000
--- a/drivers/staging/rdma/ehca/hipz_fns_core.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_CORE_H__
-#define __HIPZ_FNS_CORE_H__
-
-#include "hcp_phyp.h"
-#include "hipz_hw.h"
-
-#define hipz_galpa_store_cq(gal, offset, value) \
-	hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_cq(gal, offset) \
-	hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qp(gal, offset, value) \
-	hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
-#define hipz_galpa_load_qp(gal, offset) \
-	hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
-
-static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-	/*  ringing doorbell :-) */
-	hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
-			    EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
-}
-
-static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-	/*  ringing doorbell :-) */
-	hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
-			    EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
-}
-
-static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
-{
-	hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
-			    EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
-}
-
-static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
-{
-	u64 cqx_n0_reg;
-
-	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
-			    EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
-					   value));
-	cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
-}
-
-static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
-{
-	u64 cqx_n1_reg;
-
-	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
-			    EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
-	cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
-}
-
-#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h
deleted file mode 100644
index bf996c7..0000000
--- a/drivers/staging/rdma/ehca/hipz_hw.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  eHCA register definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_HW_H__
-#define __HIPZ_HW_H__
-
-#include "ehca_tools.h"
-
-#define EHCA_MAX_MTU 4
-
-/* QP Table Entry Memory Map */
-struct hipz_qptemm {
-	u64 qpx_hcr;
-	u64 qpx_c;
-	u64 qpx_herr;
-	u64 qpx_aer;
-/* 0x20*/
-	u64 qpx_sqa;
-	u64 qpx_sqc;
-	u64 qpx_rqa;
-	u64 qpx_rqc;
-/* 0x40*/
-	u64 qpx_st;
-	u64 qpx_pmstate;
-	u64 qpx_pmfa;
-	u64 qpx_pkey;
-/* 0x60*/
-	u64 qpx_pkeya;
-	u64 qpx_pkeyb;
-	u64 qpx_pkeyc;
-	u64 qpx_pkeyd;
-/* 0x80*/
-	u64 qpx_qkey;
-	u64 qpx_dqp;
-	u64 qpx_dlidp;
-	u64 qpx_portp;
-/* 0xa0*/
-	u64 qpx_slidp;
-	u64 qpx_slidpp;
-	u64 qpx_dlida;
-	u64 qpx_porta;
-/* 0xc0*/
-	u64 qpx_slida;
-	u64 qpx_slidpa;
-	u64 qpx_slvl;
-	u64 qpx_ipd;
-/* 0xe0*/
-	u64 qpx_mtu;
-	u64 qpx_lato;
-	u64 qpx_rlimit;
-	u64 qpx_rnrlimit;
-/* 0x100*/
-	u64 qpx_t;
-	u64 qpx_sqhp;
-	u64 qpx_sqptp;
-	u64 qpx_nspsn;
-/* 0x120*/
-	u64 qpx_nspsnhwm;
-	u64 reserved1;
-	u64 qpx_sdsi;
-	u64 qpx_sdsbc;
-/* 0x140*/
-	u64 qpx_sqwsize;
-	u64 qpx_sqwts;
-	u64 qpx_lsn;
-	u64 qpx_nssn;
-/* 0x160 */
-	u64 qpx_mor;
-	u64 qpx_cor;
-	u64 qpx_sqsize;
-	u64 qpx_erc;
-/* 0x180*/
-	u64 qpx_rnrrc;
-	u64 qpx_ernrwt;
-	u64 qpx_rnrresp;
-	u64 qpx_lmsna;
-/* 0x1a0 */
-	u64 qpx_sqhpc;
-	u64 qpx_sqcptp;
-	u64 qpx_sigt;
-	u64 qpx_wqecnt;
-/* 0x1c0*/
-	u64 qpx_rqhp;
-	u64 qpx_rqptp;
-	u64 qpx_rqsize;
-	u64 qpx_nrr;
-/* 0x1e0*/
-	u64 qpx_rdmac;
-	u64 qpx_nrpsn;
-	u64 qpx_lapsn;
-	u64 qpx_lcr;
-/* 0x200*/
-	u64 qpx_rwc;
-	u64 qpx_rwva;
-	u64 qpx_rdsi;
-	u64 qpx_rdsbc;
-/* 0x220*/
-	u64 qpx_rqwsize;
-	u64 qpx_crmsn;
-	u64 qpx_rdd;
-	u64 qpx_larpsn;
-/* 0x240*/
-	u64 qpx_pd;
-	u64 qpx_scqn;
-	u64 qpx_rcqn;
-	u64 qpx_aeqn;
-/* 0x260*/
-	u64 qpx_aaelog;
-	u64 qpx_ram;
-	u64 qpx_rdmaqe0;
-	u64 qpx_rdmaqe1;
-/* 0x280*/
-	u64 qpx_rdmaqe2;
-	u64 qpx_rdmaqe3;
-	u64 qpx_nrpsnhwm;
-/* 0x298*/
-	u64 reserved[(0x400 - 0x298) / 8];
-/* 0x400 extended data */
-	u64 reserved_ext[(0x500 - 0x400) / 8];
-/* 0x500 */
-	u64 reserved2[(0x1000 - 0x500) / 8];
-/* 0x1000      */
-};
-
-#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
-
-/* MRMWPT Entry Memory Map */
-struct hipz_mrmwmm {
-	/* 0x00 */
-	u64 mrx_hcr;
-
-	u64 mrx_c;
-	u64 mrx_herr;
-	u64 mrx_aer;
-	/* 0x20 */
-	u64 mrx_pp;
-	u64 reserved1;
-	u64 reserved2;
-	u64 reserved3;
-	/* 0x40 */
-	u64 reserved4[(0x200 - 0x40) / 8];
-	/* 0x200 */
-	u64 mrx_ctl[64];
-
-};
-
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
-
-struct hipz_qpedmm {
-	/* 0x00 */
-	u64 reserved0[(0x400) / 8];
-	/* 0x400 */
-	u64 qpedx_phh;
-	u64 qpedx_ppsgp;
-	/* 0x410 */
-	u64 qpedx_ppsgu;
-	u64 qpedx_ppdgp;
-	/* 0x420 */
-	u64 qpedx_ppdgu;
-	u64 qpedx_aph;
-	/* 0x430 */
-	u64 qpedx_apsgp;
-	u64 qpedx_apsgu;
-	/* 0x440 */
-	u64 qpedx_apdgp;
-	u64 qpedx_apdgu;
-	/* 0x450 */
-	u64 qpedx_apav;
-	u64 qpedx_apsav;
-	/* 0x460  */
-	u64 qpedx_hcr;
-	u64 reserved1[4];
-	/* 0x488 */
-	u64 qpedx_rrl0;
-	/* 0x490 */
-	u64 qpedx_rrrkey0;
-	u64 qpedx_rrva0;
-	/* 0x4a0 */
-	u64 reserved2;
-	u64 qpedx_rrl1;
-	/* 0x4b0 */
-	u64 qpedx_rrrkey1;
-	u64 qpedx_rrva1;
-	/* 0x4c0 */
-	u64 reserved3;
-	u64 qpedx_rrl2;
-	/* 0x4d0 */
-	u64 qpedx_rrrkey2;
-	u64 qpedx_rrva2;
-	/* 0x4e0 */
-	u64 reserved4;
-	u64 qpedx_rrl3;
-	/* 0x4f0 */
-	u64 qpedx_rrrkey3;
-	u64 qpedx_rrva3;
-};
-
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
-
-/* CQ Table Entry Memory Map */
-struct hipz_cqtemm {
-	u64 cqx_hcr;
-	u64 cqx_c;
-	u64 cqx_herr;
-	u64 cqx_aer;
-/* 0x20  */
-	u64 cqx_ptp;
-	u64 cqx_tp;
-	u64 cqx_fec;
-	u64 cqx_feca;
-/* 0x40  */
-	u64 cqx_ep;
-	u64 cqx_eq;
-/* 0x50  */
-	u64 reserved1;
-	u64 cqx_n0;
-/* 0x60  */
-	u64 cqx_n1;
-	u64 reserved2[(0x1000 - 0x60) / 8];
-/* 0x1000 */
-};
-
-#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32, 63)
-#define CQX_FECADDER              EHCA_BMASK_IBM(32, 63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
-
-/* EQ Table Entry Memory Map */
-struct hipz_eqtemm {
-	u64 eqx_hcr;
-	u64 eqx_c;
-
-	u64 eqx_herr;
-	u64 eqx_aer;
-/* 0x20 */
-	u64 eqx_ptp;
-	u64 eqx_tp;
-	u64 eqx_ssba;
-	u64 eqx_psba;
-
-/* 0x40 */
-	u64 eqx_cec;
-	u64 eqx_meql;
-	u64 eqx_xisbi;
-	u64 eqx_xisc;
-/* 0x60 */
-	u64 eqx_it;
-
-};
-
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
-
-/* access control defines for MR/MW */
-#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
-#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
-#define HIPZ_ACCESSCTRL_R_READ   0x00200000
-#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
-#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
-
-/* query hca response block */
-struct hipz_query_hca {
-	u32 cur_reliable_dg;
-	u32 cur_qp;
-	u32 cur_cq;
-	u32 cur_eq;
-	u32 cur_mr;
-	u32 cur_mw;
-	u32 cur_ee_context;
-	u32 cur_mcast_grp;
-	u32 cur_qp_attached_mcast_grp;
-	u32 reserved1;
-	u32 cur_ipv6_qp;
-	u32 cur_eth_qp;
-	u32 cur_hp_mr;
-	u32 reserved2[3];
-	u32 max_rd_domain;
-	u32 max_qp;
-	u32 max_cq;
-	u32 max_eq;
-	u32 max_mr;
-	u32 max_hp_mr;
-	u32 max_mw;
-	u32 max_mrwpte;
-	u32 max_special_mrwpte;
-	u32 max_rd_ee_context;
-	u32 max_mcast_grp;
-	u32 max_total_mcast_qp_attach;
-	u32 max_mcast_qp_attach;
-	u32 max_raw_ipv6_qp;
-	u32 max_raw_ethy_qp;
-	u32 internal_clock_frequency;
-	u32 max_pd;
-	u32 max_ah;
-	u32 max_cqe;
-	u32 max_wqes_wq;
-	u32 max_partitions;
-	u32 max_rr_ee_context;
-	u32 max_rr_qp;
-	u32 max_rr_hca;
-	u32 max_act_wqs_ee_context;
-	u32 max_act_wqs_qp;
-	u32 max_sge;
-	u32 max_sge_rd;
-	u32 memory_page_size_supported;
-	u64 max_mr_size;
-	u32 local_ca_ack_delay;
-	u32 num_ports;
-	u32 vendor_id;
-	u32 vendor_part_id;
-	u32 hw_ver;
-	u64 node_guid;
-	u64 hca_cap_indicators;
-	u32 data_counter_register_size;
-	u32 max_shared_rq;
-	u32 max_isns_eq;
-	u32 max_neq;
-} __attribute__ ((packed));
-
-#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
-#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
-#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
-#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
-#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
-#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
-#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
-#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
-#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
-#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
-#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
-#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
-#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
-#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
-#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
-#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
-#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
-#define HCA_CAP_H_ALLOC_RES_SYNC      EHCA_BMASK_IBM(19, 19)
-
-/* query port response block */
-struct hipz_query_port {
-	u32 state;
-	u32 bad_pkey_cntr;
-	u32 lmc;
-	u32 lid;
-	u32 subnet_timeout;
-	u32 qkey_viol_cntr;
-	u32 sm_sl;
-	u32 sm_lid;
-	u32 capability_mask;
-	u32 init_type_reply;
-	u32 pkey_tbl_len;
-	u32 gid_tbl_len;
-	u64 gid_prefix;
-	u32 port_nr;
-	u16 pkey_entries[16];
-	u8  reserved1[32];
-	u32 trent_size;
-	u32 trbuf_size;
-	u64 max_msg_sz;
-	u32 max_mtu;
-	u32 vl_cap;
-	u32 phys_pstate;
-	u32 phys_state;
-	u32 phys_speed;
-	u32 phys_width;
-	u8  reserved2[1884];
-	u64 guid_entries[255];
-} __attribute__ ((packed));
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c
deleted file mode 100644
index 7ffc748..0000000
--- a/drivers/staging/rdma/ehca/ipz_pt_fn.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ipz_pt_fn.h"
-#include "ehca_classes.h"
-
-#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
-
-struct kmem_cache *small_qp_cache;
-
-void *ipz_qpageit_get_inc(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	queue->current_q_offset += queue->pagesize;
-	if (queue->current_q_offset > queue->queue_length) {
-		queue->current_q_offset -= queue->pagesize;
-		ret = NULL;
-	}
-	if (((u64)ret) % queue->pagesize) {
-		ehca_gen_err("ERROR!! not at PAGE-Boundary");
-		return NULL;
-	}
-	return ret;
-}
-
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	u64 last_entry_in_q = queue->queue_length - queue->qe_size;
-
-	queue->current_q_offset += queue->qe_size;
-	if (queue->current_q_offset > last_entry_in_q) {
-		queue->current_q_offset = 0;
-		queue->toggle_state = (~queue->toggle_state) & 1;
-	}
-
-	return ret;
-}
-
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
-{
-	int i;
-	for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
-		u64 page = __pa(queue->queue_pages[i]);
-		if (addr >= page && addr < page + queue->pagesize) {
-			*q_offset = addr - page + i * queue->pagesize;
-			return 0;
-		}
-	}
-	return -EINVAL;
-}
-
-#if PAGE_SHIFT < EHCA_PAGESHIFT
-#error Kernel pages must be at least as large than eHCA pages (4K) !
-#endif
-
-/*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
-static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
-{
-	int k, f = 0;
-	u8 *kpage;
-
-	while (f < nr_of_pages) {
-		kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
-		if (!kpage)
-			goto out;
-
-		for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
-			queue->queue_pages[f] = (struct ipz_page *)kpage;
-			kpage += EHCA_PAGESIZE;
-			f++;
-		}
-	}
-	return 1;
-
-out:
-	for (f = 0; f < nr_of_pages && queue->queue_pages[f];
-	     f += PAGES_PER_KPAGE)
-		free_page((unsigned long)(queue->queue_pages)[f]);
-	return 0;
-}
-
-static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-	int order = ilog2(queue->pagesize) - 9;
-	struct ipz_small_queue_page *page;
-	unsigned long bit;
-
-	mutex_lock(&pd->lock);
-
-	if (!list_empty(&pd->free[order]))
-		page = list_entry(pd->free[order].next,
-				  struct ipz_small_queue_page, list);
-	else {
-		page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
-		if (!page)
-			goto out;
-
-		page->page = get_zeroed_page(GFP_KERNEL);
-		if (!page->page) {
-			kmem_cache_free(small_qp_cache, page);
-			goto out;
-		}
-
-		list_add(&page->list, &pd->free[order]);
-	}
-
-	bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
-	__set_bit(bit, page->bitmap);
-	page->fill++;
-
-	if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
-		list_move(&page->list, &pd->full[order]);
-
-	mutex_unlock(&pd->lock);
-
-	queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
-	queue->small_page = page;
-	queue->offset = bit << (order + 9);
-	return 1;
-
-out:
-	ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
-	mutex_unlock(&pd->lock);
-	return 0;
-}
-
-static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-	int order = ilog2(queue->pagesize) - 9;
-	struct ipz_small_queue_page *page = queue->small_page;
-	unsigned long bit;
-	int free_page = 0;
-
-	bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
-		>> (order + 9);
-
-	mutex_lock(&pd->lock);
-
-	__clear_bit(bit, page->bitmap);
-	page->fill--;
-
-	if (page->fill == 0) {
-		list_del(&page->list);
-		free_page = 1;
-	}
-
-	if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
-		/* the page was full until we freed the chunk */
-		list_move_tail(&page->list, &pd->free[order]);
-
-	mutex_unlock(&pd->lock);
-
-	if (free_page) {
-		free_page(page->page);
-		kmem_cache_free(small_qp_cache, page);
-	}
-}
-
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-		   const u32 nr_of_pages, const u32 pagesize,
-		   const u32 qe_size, const u32 nr_of_sg,
-		   int is_small)
-{
-	if (pagesize > PAGE_SIZE) {
-		ehca_gen_err("FATAL ERROR: pagesize=%x "
-			     "is greater than kernel page size", pagesize);
-		return 0;
-	}
-
-	/* init queue fields */
-	queue->queue_length = nr_of_pages * pagesize;
-	queue->pagesize = pagesize;
-	queue->qe_size = qe_size;
-	queue->act_nr_of_sg = nr_of_sg;
-	queue->current_q_offset = 0;
-	queue->toggle_state = 1;
-	queue->small_page = NULL;
-
-	/* allocate queue page pointers */
-	queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
-				     GFP_KERNEL | __GFP_NOWARN);
-	if (!queue->queue_pages) {
-		queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
-		if (!queue->queue_pages) {
-			ehca_gen_err("Couldn't allocate queue page list");
-			return 0;
-		}
-	}
-
-	/* allocate actual queue pages */
-	if (is_small) {
-		if (!alloc_small_queue_page(queue, pd))
-			goto ipz_queue_ctor_exit0;
-	} else
-		if (!alloc_queue_pages(queue, nr_of_pages))
-			goto ipz_queue_ctor_exit0;
-
-	return 1;
-
-ipz_queue_ctor_exit0:
-	ehca_gen_err("Couldn't alloc pages queue=%p "
-		 "nr_of_pages=%x",  queue, nr_of_pages);
-	kvfree(queue->queue_pages);
-
-	return 0;
-}
-
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
-{
-	int i, nr_pages;
-
-	if (!queue || !queue->queue_pages) {
-		ehca_gen_dbg("queue or queue_pages is NULL");
-		return 0;
-	}
-
-	if (queue->small_page)
-		free_small_queue_page(queue, pd);
-	else {
-		nr_pages = queue->queue_length / queue->pagesize;
-		for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
-			free_page((unsigned long)queue->queue_pages[i]);
-	}
-
-	kvfree(queue->queue_pages);
-
-	return 1;
-}
-
-int ehca_init_small_qp_cache(void)
-{
-	small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
-					   sizeof(struct ipz_small_queue_page),
-					   0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!small_qp_cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void ehca_cleanup_small_qp_cache(void)
-{
-	kmem_cache_destroy(small_qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h
deleted file mode 100644
index a801274..0000000
--- a/drivers/staging/rdma/ehca/ipz_pt_fn.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __IPZ_PT_FN_H__
-#define __IPZ_PT_FN_H__
-
-#define EHCA_PAGESHIFT   12
-#define EHCA_PAGESIZE   4096UL
-#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
-#define EHCA_PT_ENTRIES 512UL
-
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-
-struct ehca_pd;
-struct ipz_small_queue_page;
-
-extern struct kmem_cache *small_qp_cache;
-
-/* struct generic ehca page */
-struct ipz_page {
-	u8 entries[EHCA_PAGESIZE];
-};
-
-#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
-
-struct ipz_small_queue_page {
-	unsigned long page;
-	unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
-	int fill;
-	void *mapped_addr;
-	u32 mmap_count;
-	struct list_head list;
-};
-
-/* struct generic queue in linux kernel virtual memory (kv) */
-struct ipz_queue {
-	u64 current_q_offset;	/* current queue entry */
-
-	struct ipz_page **queue_pages;	/* array of pages belonging to queue */
-	u32 qe_size;		/* queue entry size */
-	u32 act_nr_of_sg;
-	u32 queue_length;	/* queue length allocated in bytes */
-	u32 pagesize;
-	u32 toggle_state;	/* toggle flag - per page */
-	u32 offset; /* save offset within page for small_qp */
-	struct ipz_small_queue_page *small_page;
-};
-
-/*
- * return current Queue Entry for a certain q_offset
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
-{
-	struct ipz_page *current_page;
-	if (q_offset >= queue->queue_length)
-		return NULL;
-	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
-	return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
-}
-
-/*
- * return current Queue Entry
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_get(struct ipz_queue *queue)
-{
-	return ipz_qeit_calc(queue, queue->current_q_offset);
-}
-
-/*
- * return current Queue Page , increment Queue Page iterator from
- * page to page in struct ipz_queue, last increment will return 0! and
- * NOT wrap
- * returns address (kv) of Queue Page
- * warning don't use in parallel with ipz_QE_get_inc()
- */
-void *ipz_qpageit_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	queue->current_q_offset += queue->qe_size;
-	if (queue->current_q_offset >= queue->queue_length) {
-		queue->current_q_offset = 0;
-		/* toggle the valid flag */
-		queue->toggle_state = (~queue->toggle_state) & 1;
-	}
-
-	return ret;
-}
-
-/*
- * return a bool indicating whether current Queue Entry is valid
- */
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-	struct ehca_cqe *cqe = ipz_qeit_get(queue);
-	return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
-}
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
-{
-	return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
-}
-
-/*
- * returns and resets Queue Entry iterator
- * returns address (kv) of first Queue Entry
- */
-static inline void *ipz_qeit_reset(struct ipz_queue *queue)
-{
-	queue->current_q_offset = 0;
-	return ipz_qeit_get(queue);
-}
-
-/*
- * return the q_offset corresponding to an absolute address
- */
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
-
-/*
- * return the next queue offset. don't modify the queue.
- */
-static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
-{
-	offset += queue->qe_size;
-	if (offset >= queue->queue_length) offset = 0;
-	return offset;
-}
-
-/* struct generic page table */
-struct ipz_pt {
-	u64 entries[EHCA_PT_ENTRIES];
-};
-
-/* struct page table for a queue, only to be used in pf */
-struct ipz_qpt {
-	/* queue page tables (kv), use u64 because we know the element length */
-	u64 *qpts;
-	u32 n_qpts;
-	u32 n_ptes;       /*  number of page table entries */
-	u64 *current_pte_addr;
-};
-
-/*
- * constructor for a ipz_queue_t, placement new for ipz_queue_t,
- * new for all dependent datastructors
- * all QP Tables are the same
- * flow:
- *    allocate+pin queue
- * see ipz_qpt_ctor()
- * returns true if ok, false if out of memory
- */
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-		   const u32 nr_of_pages, const u32 pagesize,
-		   const u32 qe_size, const u32 nr_of_sg,
-		   int is_small);
-
-/*
- * destructor for a ipz_queue_t
- *  -# free queue
- *  see ipz_queue_ctor()
- *  returns true if ok, false if queue was NULL-ptr of free failed
- */
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
-
-/*
- * constructor for a ipz_qpt_t,
- * placement new for struct ipz_queue, new for all dependent datastructors
- * all QP Tables are the same,
- * flow:
- * -# allocate+pin queue
- * -# initialise ptcb
- * -# allocate+pin PTs
- * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
- * -# the ring must have room for exactly nr_of_PTEs
- * see ipz_qpt_ctor()
- */
-void ipz_qpt_ctor(struct ipz_qpt *qpt,
-		  const u32 nr_of_qes,
-		  const u32 pagesize,
-		  const u32 qe_size,
-		  const u8 lowbyte, const u8 toggle,
-		  u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- * fix EQ page problems
- */
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Event Queue Entry, increment Queue Entry iterator
- * by one step in struct ipz_queue if valid, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_queue_QPageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- */
-static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	u32 qe = *(u8 *)ret;
-	if ((qe >> 7) != (queue->toggle_state & 1))
-		return NULL;
-	ipz_qeit_eq_get_inc(queue); /* this is a good one */
-	return ret;
-}
-
-static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	u32 qe = *(u8 *)ret;
-	if ((qe >> 7) != (queue->toggle_state & 1))
-		return NULL;
-	return ret;
-}
-
-/* returns address (GX) of first queue entry */
-static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
-{
-	return be64_to_cpu(qpt->qpts[0]);
-}
-
-/* returns address (kv) of first page of queue page table */
-static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
-{
-	return qpt->qpts;
-}
-
-#endif				/* __IPZ_PT_FN_H__ */
diff --git a/drivers/staging/rdma/ipath/Kconfig b/drivers/staging/rdma/ipath/Kconfig
deleted file mode 100644
index 041ce06..0000000
--- a/drivers/staging/rdma/ipath/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
-config INFINIBAND_IPATH
-	tristate "QLogic HTX HCA support"
-	depends on 64BIT && NET && HT_IRQ
-	---help---
-	This is a driver for the deprecated QLogic Hyper-Transport
-	IB host channel adapter (model QHT7140),
-	including InfiniBand verbs support.  This driver allows these
-	devices to be used with both kernel upper level protocols such
-	as IP-over-InfiniBand as well as with userspace applications
-	(in conjunction with InfiniBand userspace access).
-	For QLogic PCIe QLE based cards, use the QIB driver instead.
-
-	If you have this hardware you will need to boot with PAT disabled
-	on your x86-64 systems, use the nopat kernel parameter.
-
-	Note that this driver will soon be removed entirely from the kernel.
diff --git a/drivers/staging/rdma/ipath/Makefile b/drivers/staging/rdma/ipath/Makefile
deleted file mode 100644
index 4496f28..0000000
--- a/drivers/staging/rdma/ipath/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-	-DIPATH_KERN_TYPE=0
-
-obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-
-ib_ipath-y := \
-	ipath_cq.o \
-	ipath_diag.o \
-	ipath_dma.o \
-	ipath_driver.o \
-	ipath_eeprom.o \
-	ipath_file_ops.o \
-	ipath_fs.o \
-	ipath_init_chip.o \
-	ipath_intr.o \
-	ipath_keys.o \
-	ipath_mad.o \
-	ipath_mmap.o \
-	ipath_mr.o \
-	ipath_qp.o \
-	ipath_rc.o \
-	ipath_ruc.o \
-	ipath_sdma.o \
-	ipath_srq.o \
-	ipath_stats.o \
-	ipath_sysfs.o \
-	ipath_uc.o \
-	ipath_ud.o \
-	ipath_user_pages.o \
-	ipath_user_sdma.o \
-	ipath_verbs_mcast.o \
-	ipath_verbs.o
-
-ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
-
-ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/staging/rdma/ipath/TODO b/drivers/staging/rdma/ipath/TODO
deleted file mode 100644
index cb00158..0000000
--- a/drivers/staging/rdma/ipath/TODO
+++ /dev/null
@@ -1,5 +0,0 @@
-The ipath driver has been moved to staging in preparation for its removal in a
-few releases. The driver will be deleted during the 4.6 merge window.
-
-Contact Dennis Dalessandro <dennis.dalessandro@intel.com> and
-Cc: linux-rdma@vger.kernel.org
diff --git a/drivers/staging/rdma/ipath/ipath_common.h b/drivers/staging/rdma/ipath/ipath_common.h
deleted file mode 100644
index 28cfe97..0000000
--- a/drivers/staging/rdma/ipath/ipath_common.h
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_COMMON_H
-#define _IPATH_COMMON_H
-
-/*
- * This file contains defines, structures, etc. that are used
- * to communicate between kernel and user code.
- */
-
-
-/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
-#define IPATH_SRC_OUI_1 0x00
-#define IPATH_SRC_OUI_2 0x11
-#define IPATH_SRC_OUI_3 0x75
-
-/* version of protocol header (known to chip also). In the long run,
- * we should be able to generate and accept a range of version numbers;
- * for now we only accept one, and it's compiled in.
- */
-#define IPS_PROTO_VERSION 2
-
-/*
- * These are compile time constants that you may want to enable or disable
- * if you are trying to debug problems with code or performance.
- * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
- * fastpath code
- * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
- * traced in faspath code
- * _IPATH_TRACING define as 0 if you want to remove all tracing in a
- * compilation unit
- * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
- */
-
-/*
- * The value in the BTH QP field that InfiniPath uses to differentiate
- * an infinipath protocol IB packet vs standard IB transport
- */
-#define IPATH_KD_QP 0x656b79
-
-/*
- * valid states passed to ipath_set_linkstate() user call
- */
-#define IPATH_IB_LINKDOWN		0
-#define IPATH_IB_LINKARM		1
-#define IPATH_IB_LINKACTIVE		2
-#define IPATH_IB_LINKDOWN_ONLY		3
-#define IPATH_IB_LINKDOWN_SLEEP		4
-#define IPATH_IB_LINKDOWN_DISABLE	5
-#define IPATH_IB_LINK_LOOPBACK	6 /* enable local loopback */
-#define IPATH_IB_LINK_EXTERNAL	7 /* normal, disable local loopback */
-#define IPATH_IB_LINK_NO_HRTBT	8 /* disable Heartbeat, e.g. for loopback */
-#define IPATH_IB_LINK_HRTBT	9 /* enable heartbeat, normal, non-loopback */
-
-/*
- * These 3 values (SDR and DDR may be ORed for auto-speed
- * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
- * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs.  They
- * are also the the possible values for ipath_link_speed_enabled and active
- * The values were chosen to match values used within the IB spec.
- */
-#define IPATH_IB_SDR 1
-#define IPATH_IB_DDR 2
-
-/*
- * stats maintained by the driver.  For now, at least, this is global
- * to all minor devices.
- */
-struct infinipath_stats {
-	/* number of interrupts taken */
-	__u64 sps_ints;
-	/* number of interrupts for errors */
-	__u64 sps_errints;
-	/* number of errors from chip (not incl. packet errors or CRC) */
-	__u64 sps_errs;
-	/* number of packet errors from chip other than CRC */
-	__u64 sps_pkterrs;
-	/* number of packets with CRC errors (ICRC and VCRC) */
-	__u64 sps_crcerrs;
-	/* number of hardware errors reported (parity, etc.) */
-	__u64 sps_hwerrs;
-	/* number of times IB link changed state unexpectedly */
-	__u64 sps_iblink;
-	__u64 sps_unused; /* was fastrcvint, no longer implemented */
-	/* number of kernel (port0) packets received */
-	__u64 sps_port0pkts;
-	/* number of "ethernet" packets sent by driver */
-	__u64 sps_ether_spkts;
-	/* number of "ethernet" packets received by driver */
-	__u64 sps_ether_rpkts;
-	/* number of SMA packets sent by driver. Obsolete. */
-	__u64 sps_sma_spkts;
-	/* number of SMA packets received by driver. Obsolete. */
-	__u64 sps_sma_rpkts;
-	/* number of times all ports rcvhdrq was full and packet dropped */
-	__u64 sps_hdrqfull;
-	/* number of times all ports egrtid was full and packet dropped */
-	__u64 sps_etidfull;
-	/*
-	 * number of times we tried to send from driver, but no pio buffers
-	 * avail
-	 */
-	__u64 sps_nopiobufs;
-	/* number of ports currently open */
-	__u64 sps_ports;
-	/* list of pkeys (other than default) accepted (0 means not set) */
-	__u16 sps_pkeys[4];
-	__u16 sps_unused16[4]; /* available; maintaining compatible layout */
-	/* number of user ports per chip (not IB ports) */
-	__u32 sps_nports;
-	/* not our interrupt, or already handled */
-	__u32 sps_nullintr;
-	/* max number of packets handled per receive call */
-	__u32 sps_maxpkts_call;
-	/* avg number of packets handled per receive call */
-	__u32 sps_avgpkts_call;
-	/* total number of pages locked */
-	__u64 sps_pagelocks;
-	/* total number of pages unlocked */
-	__u64 sps_pageunlocks;
-	/*
-	 * Number of packets dropped in kernel other than errors (ether
-	 * packets if ipath not configured, etc.)
-	 */
-	__u64 sps_krdrops;
-	__u64 sps_txeparity; /* PIO buffer parity error, recovered */
-	/* pad for future growth */
-	__u64 __sps_pad[45];
-};
-
-/*
- * These are the status bits readable (in ascii form, 64bit value)
- * from the "status" sysfs file.
- */
-#define IPATH_STATUS_INITTED       0x1	/* basic initialization done */
-#define IPATH_STATUS_DISABLED      0x2	/* hardware disabled */
-/* Device has been disabled via admin request */
-#define IPATH_STATUS_ADMIN_DISABLED    0x4
-/* Chip has been found and initted */
-#define IPATH_STATUS_CHIP_PRESENT 0x20
-/* IB link is at ACTIVE, usable for data traffic */
-#define IPATH_STATUS_IB_READY     0x40
-/* link is configured, LID, MTU, etc. have been set */
-#define IPATH_STATUS_IB_CONF      0x80
-/* no link established, probably no cable */
-#define IPATH_STATUS_IB_NOCABLE  0x100
-/* A Fatal hardware error has occurred. */
-#define IPATH_STATUS_HWERROR     0x200
-
-/*
- * The list of usermode accessible registers.  Also see Reg_* later in file.
- */
-typedef enum _ipath_ureg {
-	/* (RO)  DMA RcvHdr to be used next. */
-	ur_rcvhdrtail = 0,
-	/* (RW)  RcvHdr entry to be processed next by host. */
-	ur_rcvhdrhead = 1,
-	/* (RO)  Index of next Eager index to use. */
-	ur_rcvegrindextail = 2,
-	/* (RW)  Eager TID to be processed next */
-	ur_rcvegrindexhead = 3,
-	/* For internal use only; max register number. */
-	_IPATH_UregMax
-} ipath_ureg;
-
-/* bit values for spi_runtime_flags */
-#define IPATH_RUNTIME_HT	0x1
-#define IPATH_RUNTIME_PCIE	0x2
-#define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
-#define IPATH_RUNTIME_RCVHDR_COPY	0x8
-#define IPATH_RUNTIME_MASTER	0x10
-#define IPATH_RUNTIME_NODMA_RTAIL 0x80
-#define IPATH_RUNTIME_SDMA	      0x200
-#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
-#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
-
-/*
- * This structure is returned by ipath_userinit() immediately after
- * open to get implementation-specific info, and info specific to this
- * instance.
- *
- * This struct must have explict pad fields where type sizes
- * may result in different alignments between 32 and 64 bit
- * programs, since the 64 bit * bit kernel requires the user code
- * to have matching offsets
- */
-struct ipath_base_info {
-	/* version of hardware, for feature checking. */
-	__u32 spi_hw_version;
-	/* version of software, for feature checking. */
-	__u32 spi_sw_version;
-	/* InfiniPath port assigned, goes into sent packets */
-	__u16 spi_port;
-	__u16 spi_subport;
-	/*
-	 * IB MTU, packets IB data must be less than this.
-	 * The MTU is in bytes, and will be a multiple of 4 bytes.
-	 */
-	__u32 spi_mtu;
-	/*
-	 * Size of a PIO buffer.  Any given packet's total size must be less
-	 * than this (in words).  Included is the starting control word, so
-	 * if 513 is returned, then total pkt size is 512 words or less.
-	 */
-	__u32 spi_piosize;
-	/* size of the TID cache in infinipath, in entries */
-	__u32 spi_tidcnt;
-	/* size of the TID Eager list in infinipath, in entries */
-	__u32 spi_tidegrcnt;
-	/* size of a single receive header queue entry in words. */
-	__u32 spi_rcvhdrent_size;
-	/*
-	 * Count of receive header queue entries allocated.
-	 * This may be less than the spu_rcvhdrcnt passed in!.
-	 */
-	__u32 spi_rcvhdr_cnt;
-
-	/* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
-	__u32 spi_runtime_flags;
-
-	/* address where receive buffer queue is mapped into */
-	__u64 spi_rcvhdr_base;
-
-	/* user program. */
-
-	/* base address of eager TID receive buffers. */
-	__u64 spi_rcv_egrbufs;
-
-	/* Allocated by initialization code, not by protocol. */
-
-	/*
-	 * Size of each TID buffer in host memory, starting at
-	 * spi_rcv_egrbufs.  The buffers are virtually contiguous.
-	 */
-	__u32 spi_rcv_egrbufsize;
-	/*
-	 * The special QP (queue pair) value that identifies an infinipath
-	 * protocol packet from standard IB packets.  More, probably much
-	 * more, to be added.
-	 */
-	__u32 spi_qpair;
-
-	/*
-	 * User register base for init code, not to be used directly by
-	 * protocol or applications.
-	 */
-	__u64 __spi_uregbase;
-	/*
-	 * Maximum buffer size in bytes that can be used in a single TID
-	 * entry (assuming the buffer is aligned to this boundary).  This is
-	 * the minimum of what the hardware and software support Guaranteed
-	 * to be a power of 2.
-	 */
-	__u32 spi_tid_maxsize;
-	/*
-	 * alignment of each pio send buffer (byte count
-	 * to add to spi_piobufbase to get to second buffer)
-	 */
-	__u32 spi_pioalign;
-	/*
-	 * The index of the first pio buffer available to this process;
-	 * needed to do lookup in spi_pioavailaddr; not added to
-	 * spi_piobufbase.
-	 */
-	__u32 spi_pioindex;
-	 /* number of buffers mapped for this process */
-	__u32 spi_piocnt;
-
-	/*
-	 * Base address of writeonly pio buffers for this process.
-	 * Each buffer has spi_piosize words, and is aligned on spi_pioalign
-	 * boundaries.  spi_piocnt buffers are mapped from this address
-	 */
-	__u64 spi_piobufbase;
-
-	/*
-	 * Base address of readonly memory copy of the pioavail registers.
-	 * There are 2 bits for each buffer.
-	 */
-	__u64 spi_pioavailaddr;
-
-	/*
-	 * Address where driver updates a copy of the interface and driver
-	 * status (IPATH_STATUS_*) as a 64 bit value.  It's followed by a
-	 * string indicating hardware error, if there was one.
-	 */
-	__u64 spi_status;
-
-	/* number of chip ports available to user processes */
-	__u32 spi_nports;
-	/* unit number of chip we are using */
-	__u32 spi_unit;
-	/* num bufs in each contiguous set */
-	__u32 spi_rcv_egrperchunk;
-	/* size in bytes of each contiguous set */
-	__u32 spi_rcv_egrchunksize;
-	/* total size of mmap to cover full rcvegrbuffers */
-	__u32 spi_rcv_egrbuftotlen;
-	__u32 spi_filler_for_align;
-	/* address of readonly memory copy of the rcvhdrq tail register. */
-	__u64 spi_rcvhdr_tailaddr;
-
-	/* shared memory pages for subports if port is shared */
-	__u64 spi_subport_uregbase;
-	__u64 spi_subport_rcvegrbuf;
-	__u64 spi_subport_rcvhdr_base;
-
-	/* shared memory page for hardware port if it is shared */
-	__u64 spi_port_uregbase;
-	__u64 spi_port_rcvegrbuf;
-	__u64 spi_port_rcvhdr_base;
-	__u64 spi_port_rcvhdr_tailaddr;
-
-} __attribute__ ((aligned(8)));
-
-
-/*
- * This version number is given to the driver by the user code during
- * initialization in the spu_userversion field of ipath_user_info, so
- * the driver can check for compatibility with user code.
- *
- * The major version changes when data structures
- * change in an incompatible way.  The driver must be the same or higher
- * for initialization to succeed.  In some cases, a higher version
- * driver will not interoperate with older software, and initialization
- * will return an error.
- */
-#define IPATH_USER_SWMAJOR 1
-
-/*
- * Minor version differences are always compatible
- * a within a major version, however if user software is larger
- * than driver software, some new features and/or structure fields
- * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference.
- */
-#define IPATH_USER_SWMINOR 6
-
-#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
-
-#define IPATH_KERN_TYPE 0
-
-/*
- * Similarly, this is the kernel version going back to the user.  It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a QLogic release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of ipath_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
-
-/*
- * This structure is passed to ipath_userinit() to tell the driver where
- * user code buffers are, sizes, etc.   The offsets and sizes of the
- * fields must remain unchanged, for binary compatibility.  It can
- * be extended, if userversion is changed so user code can tell, if needed
- */
-struct ipath_user_info {
-	/*
-	 * version of user software, to detect compatibility issues.
-	 * Should be set to IPATH_USER_SWVERSION.
-	 */
-	__u32 spu_userversion;
-
-	/* desired number of receive header queue entries */
-	__u32 spu_rcvhdrcnt;
-
-	/* size of struct base_info to write to */
-	__u32 spu_base_info_size;
-
-	/*
-	 * number of words in KD protocol header
-	 * This tells InfiniPath how many words to copy to rcvhdrq.  If 0,
-	 * kernel uses a default.  Once set, attempts to set any other value
-	 * are an error (EAGAIN) until driver is reloaded.
-	 */
-	__u32 spu_rcvhdrsize;
-
-	/*
-	 * If two or more processes wish to share a port, each process
-	 * must set the spu_subport_cnt and spu_subport_id to the same
-	 * values.  The only restriction on the spu_subport_id is that
-	 * it be unique for a given node.
-	 */
-	__u16 spu_subport_cnt;
-	__u16 spu_subport_id;
-
-	__u32 spu_unused; /* kept for compatible layout */
-
-	/*
-	 * address of struct base_info to write to
-	 */
-	__u64 spu_base_info;
-
-} __attribute__ ((aligned(8)));
-
-/* User commands. */
-
-#define IPATH_CMD_MIN		16
-
-#define __IPATH_CMD_USER_INIT	16	/* old set up userspace (for old user code) */
-#define IPATH_CMD_PORT_INFO	17	/* find out what resources we got */
-#define IPATH_CMD_RECV_CTRL	18	/* control receipt of packets */
-#define IPATH_CMD_TID_UPDATE	19	/* update expected TID entries */
-#define IPATH_CMD_TID_FREE	20	/* free expected TID entries */
-#define IPATH_CMD_SET_PART_KEY	21	/* add partition key */
-#define __IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes (for old user code) */
-#define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
-#define IPATH_CMD_USER_INIT 	24	/* set up userspace */
-#define IPATH_CMD_UNUSED_1	25
-#define IPATH_CMD_UNUSED_2	26
-#define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
-#define IPATH_CMD_POLL_TYPE	28	/* set the kind of polling we want */
-#define IPATH_CMD_ARMLAUNCH_CTRL	29 /* armlaunch detection control */
-/* 30 is unused */
-#define IPATH_CMD_SDMA_INFLIGHT 31	/* sdma inflight counter request */
-#define IPATH_CMD_SDMA_COMPLETE 32	/* sdma completion counter request */
-
-/*
- * Poll types
- */
-#define IPATH_POLL_TYPE_URGENT	 0x01
-#define IPATH_POLL_TYPE_OVERFLOW 0x02
-
-struct ipath_port_info {
-	__u32 num_active;	/* number of active units */
-	__u32 unit;		/* unit (chip) assigned to caller */
-	__u16 port;		/* port on unit assigned to caller */
-	__u16 subport;		/* subport on unit assigned to caller */
-	__u16 num_ports;	/* number of ports available on unit */
-	__u16 num_subports;	/* number of subports opened on port */
-};
-
-struct ipath_tid_info {
-	__u32 tidcnt;
-	/* make structure same size in 32 and 64 bit */
-	__u32 tid__unused;
-	/* virtual address of first page in transfer */
-	__u64 tidvaddr;
-	/* pointer (same size 32/64 bit) to __u16 tid array */
-	__u64 tidlist;
-
-	/*
-	 * pointer (same size 32/64 bit) to bitmap of TIDs used
-	 * for this call; checked for being large enough at open
-	 */
-	__u64 tidmap;
-};
-
-struct ipath_cmd {
-	__u32 type;			/* command type */
-	union {
-		struct ipath_tid_info tid_info;
-		struct ipath_user_info user_info;
-
-		/*
-		 * address in userspace where we should put the sdma
-		 * inflight counter
-		 */
-		__u64 sdma_inflight;
-		/*
-		 * address in userspace where we should put the sdma
-		 * completion counter
-		 */
-		__u64 sdma_complete;
-		/* address in userspace of struct ipath_port_info to
-		   write result to */
-		__u64 port_info;
-		/* enable/disable receipt of packets */
-		__u32 recv_ctrl;
-		/* enable/disable armlaunch errors (non-zero to enable) */
-		__u32 armlaunch_ctrl;
-		/* partition key to set */
-		__u16 part_key;
-		/* user address of __u32 bitmask of active slaves */
-		__u64 slave_mask_addr;
-		/* type of polling we want */
-		__u16 poll_type;
-	} cmd;
-};
-
-struct ipath_iovec {
-	/* Pointer to data, but same size 32 and 64 bit */
-	__u64 iov_base;
-
-	/*
-	 * Length of data; don't need 64 bits, but want
-	 * ipath_sendpkt to remain same size as before 32 bit changes, so...
-	 */
-	__u64 iov_len;
-};
-
-/*
- * Describes a single packet for send.  Each packet can have one or more
- * buffers, but the total length (exclusive of IB headers) must be less
- * than the MTU, and if using the PIO method, entire packet length,
- * including IB headers, must be less than the ipath_piosize value (words).
- * Use of this necessitates including sys/uio.h
- */
-struct __ipath_sendpkt {
-	__u32 sps_flags;	/* flags for packet (TBD) */
-	__u32 sps_cnt;		/* number of entries to use in sps_iov */
-	/* array of iov's describing packet. TEMPORARY */
-	struct ipath_iovec sps_iov[4];
-};
-
-/*
- * diagnostics can send a packet by "writing" one of the following
- * two structs to diag data special file
- * The first is the legacy version for backward compatibility
- */
-struct ipath_diag_pkt {
-	__u32 unit;
-	__u64 data;
-	__u32 len;
-};
-
-/* The second diag_pkt struct is the expanded version that allows
- * more control over the packet, specifically, by allowing a custom
- * pbc (+ static rate) qword, so that special modes and deliberate
- * changes to CRCs can be used. The elements were also re-ordered
- * for better alignment and to avoid padding issues.
- */
-struct ipath_diag_xpkt {
-	__u64 data;
-	__u64 pbc_wd;
-	__u32 unit;
-	__u32 len;
-};
-
-/*
- * Data layout in I2C flash (for GUID, etc.)
- * All fields are little-endian binary unless otherwise stated
- */
-#define IPATH_FLASH_VERSION 2
-struct ipath_flash {
-	/* flash layout version (IPATH_FLASH_VERSION) */
-	__u8 if_fversion;
-	/* checksum protecting if_length bytes */
-	__u8 if_csum;
-	/*
-	 * valid length (in use, protected by if_csum), including
-	 * if_fversion and if_csum themselves)
-	 */
-	__u8 if_length;
-	/* the GUID, in network order */
-	__u8 if_guid[8];
-	/* number of GUIDs to use, starting from if_guid */
-	__u8 if_numguid;
-	/* the (last 10 characters of) board serial number, in ASCII */
-	char if_serial[12];
-	/* board mfg date (YYYYMMDD ASCII) */
-	char if_mfgdate[8];
-	/* last board rework/test date (YYYYMMDD ASCII) */
-	char if_testdate[8];
-	/* logging of error counts, TBD */
-	__u8 if_errcntp[4];
-	/* powered on hours, updated at driver unload */
-	__u8 if_powerhour[2];
-	/* ASCII free-form comment field */
-	char if_comment[32];
-	/* Backwards compatible prefix for longer QLogic Serial Numbers */
-	char if_sprefix[4];
-	/* 82 bytes used, min flash size is 128 bytes */
-	__u8 if_future[46];
-};
-
-/*
- * These are the counters implemented in the chip, and are listed in order.
- * The InterCaps naming is taken straight from the chip spec.
- */
-struct infinipath_counters {
-	__u64 LBIntCnt;
-	__u64 LBFlowStallCnt;
-	__u64 TxSDmaDescCnt;	/* was Reserved1 */
-	__u64 TxUnsupVLErrCnt;
-	__u64 TxDataPktCnt;
-	__u64 TxFlowPktCnt;
-	__u64 TxDwordCnt;
-	__u64 TxLenErrCnt;
-	__u64 TxMaxMinLenErrCnt;
-	__u64 TxUnderrunCnt;
-	__u64 TxFlowStallCnt;
-	__u64 TxDroppedPktCnt;
-	__u64 RxDroppedPktCnt;
-	__u64 RxDataPktCnt;
-	__u64 RxFlowPktCnt;
-	__u64 RxDwordCnt;
-	__u64 RxLenErrCnt;
-	__u64 RxMaxMinLenErrCnt;
-	__u64 RxICRCErrCnt;
-	__u64 RxVCRCErrCnt;
-	__u64 RxFlowCtrlErrCnt;
-	__u64 RxBadFormatCnt;
-	__u64 RxLinkProblemCnt;
-	__u64 RxEBPCnt;
-	__u64 RxLPCRCErrCnt;
-	__u64 RxBufOvflCnt;
-	__u64 RxTIDFullErrCnt;
-	__u64 RxTIDValidErrCnt;
-	__u64 RxPKeyMismatchCnt;
-	__u64 RxP0HdrEgrOvflCnt;
-	__u64 RxP1HdrEgrOvflCnt;
-	__u64 RxP2HdrEgrOvflCnt;
-	__u64 RxP3HdrEgrOvflCnt;
-	__u64 RxP4HdrEgrOvflCnt;
-	__u64 RxP5HdrEgrOvflCnt;
-	__u64 RxP6HdrEgrOvflCnt;
-	__u64 RxP7HdrEgrOvflCnt;
-	__u64 RxP8HdrEgrOvflCnt;
-	__u64 RxP9HdrEgrOvflCnt;	/* was Reserved6 */
-	__u64 RxP10HdrEgrOvflCnt;	/* was Reserved7 */
-	__u64 RxP11HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP12HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP13HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP14HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP15HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP16HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 IBStatusChangeCnt;
-	__u64 IBLinkErrRecoveryCnt;
-	__u64 IBLinkDownedCnt;
-	__u64 IBSymbolErrCnt;
-	/* The following are new for IBA7220 */
-	__u64 RxVL15DroppedPktCnt;
-	__u64 RxOtherLocalPhyErrCnt;
-	__u64 PcieRetryBufDiagQwordCnt;
-	__u64 ExcessBufferOvflCnt;
-	__u64 LocalLinkIntegrityErrCnt;
-	__u64 RxVlErrCnt;
-	__u64 RxDlidFltrCnt;
-};
-
-/*
- * The next set of defines are for packet headers, and chip register
- * and memory bits that are visible to and/or used by user-mode software
- * The other bits that are used only by the driver or diags are in
- * ipath_registers.h
- */
-
-/* RcvHdrFlags bits */
-#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
-#define INFINIPATH_RHF_LENGTH_SHIFT 0
-#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
-#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
-#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
-#define INFINIPATH_RHF_SEQ_MASK 0xF
-#define INFINIPATH_RHF_SEQ_SHIFT 0
-#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
-#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
-#define INFINIPATH_RHF_H_ICRCERR   0x80000000
-#define INFINIPATH_RHF_H_VCRCERR   0x40000000
-#define INFINIPATH_RHF_H_PARITYERR 0x20000000
-#define INFINIPATH_RHF_H_LENERR    0x10000000
-#define INFINIPATH_RHF_H_MTUERR    0x08000000
-#define INFINIPATH_RHF_H_IHDRERR   0x04000000
-#define INFINIPATH_RHF_H_TIDERR    0x02000000
-#define INFINIPATH_RHF_H_MKERR     0x01000000
-#define INFINIPATH_RHF_H_IBERR     0x00800000
-#define INFINIPATH_RHF_H_ERR_MASK  0xFF800000
-#define INFINIPATH_RHF_L_USE_EGR   0x80000000
-#define INFINIPATH_RHF_L_SWA       0x00008000
-#define INFINIPATH_RHF_L_SWB       0x00004000
-
-/* infinipath header fields */
-#define INFINIPATH_I_VERS_MASK 0xF
-#define INFINIPATH_I_VERS_SHIFT 28
-#define INFINIPATH_I_PORT_MASK 0xF
-#define INFINIPATH_I_PORT_SHIFT 24
-#define INFINIPATH_I_TID_MASK 0x7FF
-#define INFINIPATH_I_TID_SHIFT 13
-#define INFINIPATH_I_OFFSET_MASK 0x1FFF
-#define INFINIPATH_I_OFFSET_SHIFT 0
-
-/* K_PktFlags bits */
-#define INFINIPATH_KPF_INTR 0x1
-#define INFINIPATH_KPF_SUBPORT_MASK 0x3
-#define INFINIPATH_KPF_SUBPORT_SHIFT 1
-
-#define INFINIPATH_MAX_SUBPORT	4
-
-/* SendPIO per-buffer control */
-#define INFINIPATH_SP_TEST    0x40
-#define INFINIPATH_SP_TESTEBP 0x20
-#define INFINIPATH_SP_TRIGGER_SHIFT  15
-
-/* SendPIOAvail bits */
-#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
-#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
-
-/* infinipath header format */
-struct ipath_header {
-	/*
-	 * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
-	 * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-	 * Port 4, TID 11, offset 13.
-	 */
-	__le32 ver_port_tid_offset;
-	__le16 chksum;
-	__le16 pkt_flags;
-};
-
-/* infinipath user message header format.
- * This structure contains the first 4 fields common to all protocols
- * that employ infinipath.
- */
-struct ipath_message_header {
-	__be16 lrh[4];
-	__be32 bth[3];
-	/* fields below this point are in host byte order */
-	struct ipath_header iph;
-	__u8 sub_opcode;
-};
-
-/* infinipath ethernet header format */
-struct ether_header {
-	__be16 lrh[4];
-	__be32 bth[3];
-	struct ipath_header iph;
-	__u8 sub_opcode;
-	__u8 cmd;
-	__be16 lid;
-	__u16 mac[3];
-	__u8 frag_num;
-	__u8 seq_num;
-	__le32 len;
-	/* MUST be of word size due to PIO write requirements */
-	__le32 csum;
-	__le16 csum_offset;
-	__le16 flags;
-	__u16 first_2_bytes;
-	__u8 unused[2];		/* currently unused */
-};
-
-
-/* IB - LRH header consts */
-#define IPATH_LRH_GRH 0x0003	/* 1. word of IB LRH - next header: GRH */
-#define IPATH_LRH_BTH 0x0002	/* 1. word of IB LRH - next header: BTH */
-
-/* misc. */
-#define SIZE_OF_CRC 1
-
-#define IPATH_DEFAULT_P_KEY 0xFFFF
-#define IPATH_PERMISSIVE_LID 0xFFFF
-#define IPATH_AETH_CREDIT_SHIFT 24
-#define IPATH_AETH_CREDIT_MASK 0x1F
-#define IPATH_AETH_CREDIT_INVAL 0x1F
-#define IPATH_PSN_MASK 0xFFFFFF
-#define IPATH_MSN_MASK 0xFFFFFF
-#define IPATH_QPN_MASK 0xFFFFFF
-#define IPATH_MULTICAST_LID_BASE 0xC000
-#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
-#define IPATH_MULTICAST_QPN 0xFFFFFF
-
-/* Receive Header Queue: receive type (from infinipath) */
-#define RCVHQ_RCV_TYPE_EXPECTED  0
-#define RCVHQ_RCV_TYPE_EAGER     1
-#define RCVHQ_RCV_TYPE_NON_KD    2
-#define RCVHQ_RCV_TYPE_ERROR     3
-
-
-/* sub OpCodes - ith4x  */
-#define IPATH_ITH4X_OPCODE_ENCAP 0x81
-#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
-
-#define IPATH_HEADER_QUEUE_WORDS 9
-
-/* functions for extracting fields from rcvhdrq entries for the driver.
- */
-static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
-{
-	return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
-}
-
-static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
-{
-	return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
-	    & INFINIPATH_RHF_RCVTYPE_MASK;
-}
-
-static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
-{
-	return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
-		& INFINIPATH_RHF_LENGTH_MASK) << 2;
-}
-
-static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
-{
-	return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
-	    & INFINIPATH_RHF_EGRINDEX_MASK;
-}
-
-static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
-{
-	return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
-		& INFINIPATH_RHF_SEQ_MASK;
-}
-
-static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
-{
-	return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
-		& INFINIPATH_RHF_HDRQ_OFFSET_MASK;
-}
-
-static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
-{
-	return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
-}
-
-static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
-{
-	return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
-	    & INFINIPATH_I_VERS_MASK;
-}
-
-#endif				/* _IPATH_COMMON_H */
diff --git a/drivers/staging/rdma/ipath/ipath_cq.c b/drivers/staging/rdma/ipath/ipath_cq.c
deleted file mode 100644
index e9dd911..0000000
--- a/drivers/staging/rdma/ipath/ipath_cq.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
-{
-	struct ipath_cq_wc *wc;
-	unsigned long flags;
-	u32 head;
-	u32 next;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	/*
-	 * Note that the head pointer might be writable by user processes.
-	 * Take care to verify it is a sane value.
-	 */
-	wc = cq->queue;
-	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
-		head = cq->ibcq.cqe;
-		next = 0;
-	} else
-		next = head + 1;
-	if (unlikely(next == wc->tail)) {
-		spin_unlock_irqrestore(&cq->lock, flags);
-		if (cq->ibcq.event_handler) {
-			struct ib_event ev;
-
-			ev.device = cq->ibcq.device;
-			ev.element.cq = &cq->ibcq;
-			ev.event = IB_EVENT_CQ_ERR;
-			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-		}
-		return;
-	}
-	if (cq->ip) {
-		wc->uqueue[head].wr_id = entry->wr_id;
-		wc->uqueue[head].status = entry->status;
-		wc->uqueue[head].opcode = entry->opcode;
-		wc->uqueue[head].vendor_err = entry->vendor_err;
-		wc->uqueue[head].byte_len = entry->byte_len;
-		wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
-		wc->uqueue[head].qp_num = entry->qp->qp_num;
-		wc->uqueue[head].src_qp = entry->src_qp;
-		wc->uqueue[head].wc_flags = entry->wc_flags;
-		wc->uqueue[head].pkey_index = entry->pkey_index;
-		wc->uqueue[head].slid = entry->slid;
-		wc->uqueue[head].sl = entry->sl;
-		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-		wc->uqueue[head].port_num = entry->port_num;
-		/* Make sure entry is written before the head index. */
-		smp_wmb();
-	} else
-		wc->kqueue[head] = *entry;
-	wc->head = next;
-
-	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
-		cq->notify = IB_CQ_NONE;
-		cq->triggered++;
-		/*
-		 * This will cause send_complete() to be called in
-		 * another thread.
-		 */
-		tasklet_hi_schedule(&cq->comptask);
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	if (entry->status != IB_WC_SUCCESS)
-		to_idev(cq->ibcq.device)->n_wqe_errs++;
-}
-
-/**
- * ipath_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct ipath_cq *cq = to_icq(ibcq);
-	struct ipath_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
-
-static void send_complete(unsigned long data)
-{
-	struct ipath_cq *cq = (struct ipath_cq *)data;
-
-	/*
-	 * The completion handler will most likely rearm the notification
-	 * and poll for all pending entries.  If a new completion entry
-	 * is added while we are in this routine, tasklet_hi_schedule()
-	 * won't call us again until we return so we check triggered to
-	 * see if we need to call the handler again.
-	 */
-	for (;;) {
-		u8 triggered = cq->triggered;
-
-		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-
-		if (cq->triggered == triggered)
-			return;
-	}
-}
-
-/**
- * ipath_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the InfiniPath driver
- * @udata: unused by the InfiniPath driver
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-			      const struct ib_cq_init_attr *attr,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cq *cq;
-	struct ipath_cq_wc *wc;
-	struct ib_cq *ret;
-	u32 sz;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (entries < 1 || entries > ib_ipath_max_cqes) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Allocate the completion queue entries and head/tail pointers.
-	 * This is allocated separately so that it can be resized and
-	 * also mapped into user space.
-	 * We need to use vmalloc() in order to support mmap and large
-	 * numbers of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-	else
-		sz += sizeof(struct ib_wc) * (entries + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_cq;
-	}
-
-	/*
-	 * Return the address of the WC as the offset to mmap.
-	 * See ipath_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-
-		cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
-		if (!cq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		cq->ip = NULL;
-
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
-
-	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	/*
-	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-	 * The number of entries should be >= the number requested or return
-	 * an error.
-	 */
-	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
-	spin_lock_init(&cq->lock);
-	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	wc->head = 0;
-	wc->tail = 0;
-	cq->queue = wc;
-
-	ret = &cq->ibcq;
-
-	goto done;
-
-bail_ip:
-	kfree(cq->ip);
-bail_wc:
-	vfree(wc);
-bail_cq:
-	kfree(cq);
-done:
-	return ret;
-}
-
-/**
- * ipath_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int ipath_destroy_cq(struct ib_cq *ibcq)
-{
-	struct ipath_ibdev *dev = to_idev(ibcq->device);
-	struct ipath_cq *cq = to_icq(ibcq);
-
-	tasklet_kill(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
-	if (cq->ip)
-		kref_put(&cq->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(cq->queue);
-	kfree(cq);
-
-	return 0;
-}
-
-/**
- * ipath_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct ipath_cq *cq = to_icq(ibcq);
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&cq->lock, flags);
-	/*
-	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-	 */
-	if (cq->notify != IB_CQ_NEXT_COMP)
-		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-	    cq->queue->head != cq->queue->tail)
-		ret = 1;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return ret;
-}
-
-/**
- * ipath_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-	struct ipath_cq *cq = to_icq(ibcq);
-	struct ipath_cq_wc *old_wc;
-	struct ipath_cq_wc *wc;
-	u32 head, tail, n;
-	int ret;
-	u32 sz;
-
-	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-	else
-		sz += sizeof(struct ib_wc) * (cqe + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
-
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
-
-	spin_lock_irq(&cq->lock);
-	/*
-	 * Make sure head and tail are sane since they
-	 * might be user writable.
-	 */
-	old_wc = cq->queue;
-	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
-	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	if (head < tail)
-		n = cq->ibcq.cqe + 1 + head - tail;
-	else
-		n = head - tail;
-	if (unlikely((u32)cqe < n)) {
-		ret = -EINVAL;
-		goto bail_unlock;
-	}
-	for (n = 0; tail != head; n++) {
-		if (cq->ip)
-			wc->uqueue[n] = old_wc->uqueue[tail];
-		else
-			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	cq->ibcq.cqe = cqe;
-	wc->head = n;
-	wc->tail = 0;
-	cq->queue = wc;
-	spin_unlock_irq(&cq->lock);
-
-	vfree(old_wc);
-
-	if (cq->ip) {
-		struct ipath_ibdev *dev = to_idev(ibcq->device);
-		struct ipath_mmap_info *ip = cq->ip;
-
-		ipath_update_mmap_info(dev, ip, sz, wc);
-
-		/*
-		 * Return the offset to mmap.
-		 * See ipath_mmap() for details.
-		 */
-		if (udata && udata->outlen >= sizeof(__u64)) {
-			ret = ib_copy_to_udata(udata, &ip->offset,
-					       sizeof(ip->offset));
-			if (ret)
-				goto bail;
-		}
-
-		spin_lock_irq(&dev->pending_lock);
-		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&cq->lock);
-bail_free:
-	vfree(wc);
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_debug.h b/drivers/staging/rdma/ipath/ipath_debug.h
deleted file mode 100644
index 65926cd..0000000
--- a/drivers/staging/rdma/ipath/ipath_debug.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_DEBUG_H
-#define _IPATH_DEBUG_H
-
-#ifndef _IPATH_DEBUGGING	/* debugging enabled or not */
-#define _IPATH_DEBUGGING 1
-#endif
-
-#if _IPATH_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or disable
- * dynamically.  This can be set at modprobe time also:
- *      modprobe infinipath.ko infinipath_debug=7
- */
-
-#define __IPATH_INFO        0x1	/* generic low verbosity stuff */
-#define __IPATH_DBG         0x2	/* generic debug */
-#define __IPATH_TRSAMPLE    0x8	/* generate trace buffer sample entries */
-/* leave some low verbosity spots open */
-#define __IPATH_VERBDBG     0x40	/* very verbose debug */
-#define __IPATH_PKTDBG      0x80	/* print packet data */
-/* print process startup (init)/exit messages */
-#define __IPATH_PROCDBG     0x100
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG       0x200
-#define __IPATH_ERRPKTDBG   0x400
-#define __IPATH_USER_SEND   0x1000	/* use user mode send */
-#define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
-#define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
-#define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
-#define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
-#define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
-#define __IPATH_IPATHPD     0x80000	/* Ethernet (IPATH) packet dump */
-#define __IPATH_IPATHTABLE  0x100000	/* Ethernet (IPATH) table dump */
-#define __IPATH_LINKVERBDBG 0x200000	/* very verbose linkchange debug */
-
-#else				/* _IPATH_DEBUGGING */
-
-/*
- * define all of these even with debugging off, for the few places that do
- * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
- * compiler eliminate the code
- */
-
-#define __IPATH_INFO      0x0	/* generic low verbosity stuff */
-#define __IPATH_DBG       0x0	/* generic debug */
-#define __IPATH_TRSAMPLE  0x0	/* generate trace buffer sample entries */
-#define __IPATH_VERBDBG   0x0	/* very verbose debug */
-#define __IPATH_PKTDBG    0x0	/* print packet data */
-#define __IPATH_PROCDBG   0x0	/* process startup (init)/exit messages */
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG     0x0
-#define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
-#define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
-#define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
-#define __IPATH_IPATHPD   0x0	/* Ethernet (IPATH) packet dump on   */
-#define __IPATH_IPATHTABLE 0x0	/* Ethernet (IPATH) packet dump on   */
-#define __IPATH_LINKVERBDBG 0x0	/* very verbose linkchange debug */
-
-#endif				/* _IPATH_DEBUGGING */
-
-#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
-
-#endif				/* _IPATH_DEBUG_H */
diff --git a/drivers/staging/rdma/ipath/ipath_diag.c b/drivers/staging/rdma/ipath/ipath_diag.c
deleted file mode 100644
index 45802e9..0000000
--- a/drivers/staging/rdma/ipath/ipath_diag.c
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains support for diagnostic functions.  It is accessed by
- * opening the ipath_diag device, normally minor number 129.  Diagnostic use
- * of the InfiniPath chip may render the chip or board unusable until the
- * driver is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/export.h>
-#include <asm/uaccess.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-int ipath_diag_inuse;
-static int diag_set_link;
-
-static int ipath_diag_open(struct inode *in, struct file *fp);
-static int ipath_diag_release(struct inode *in, struct file *fp);
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-			       size_t count, loff_t *off);
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-				size_t count, loff_t *off);
-
-static const struct file_operations diag_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_diag_write,
-	.read = ipath_diag_read,
-	.open = ipath_diag_open,
-	.release = ipath_diag_release,
-	.llseek = default_llseek,
-};
-
-static ssize_t ipath_diagpkt_write(struct file *fp,
-				   const char __user *data,
-				   size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_diagpkt_write,
-	.llseek = noop_llseek,
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev *diagpkt_cdev;
-static struct device *diagpkt_dev;
-
-int ipath_diag_add(struct ipath_devdata *dd)
-{
-	char name[16];
-	int ret = 0;
-
-	if (atomic_inc_return(&diagpkt_count) == 1) {
-		ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
-				      "ipath_diagpkt", &diagpkt_file_ops,
-				      &diagpkt_cdev, &diagpkt_dev);
-
-		if (ret) {
-			ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
-				      "device: %d", ret);
-			goto done;
-		}
-	}
-
-	snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
-
-	ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
-			      &diag_file_ops, &dd->diag_cdev,
-			      &dd->diag_dev);
-	if (ret)
-		ipath_dev_err(dd, "Couldn't create %s device: %d",
-			      name, ret);
-
-done:
-	return ret;
-}
-
-void ipath_diag_remove(struct ipath_devdata *dd)
-{
-	if (atomic_dec_and_test(&diagpkt_count))
-		ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
-
-	ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
-}
-
-/**
- * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy (multiple of 32 bits)
- *
- * This function also localizes all chip memory accesses.
- * The copy should be written such that we read full cacheline packets
- * from the chip.  This is usually used for a single qword
- *
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
-			     const void __iomem *caddr, size_t count)
-{
-	const u64 __iomem *reg_addr = caddr;
-	const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-	int ret;
-
-	/* not very efficient, but it works for now */
-	if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	while (reg_addr < reg_end) {
-		u64 data = readq(reg_addr);
-		if (copy_to_user(uaddr, &data, sizeof(u64))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		reg_addr++;
-		uaddr += sizeof(u64);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: the number of bytes to copy (multiple of 32 bits)
- *
- * This is usually used for a single qword
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-
-static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
-			      const void __user *uaddr, size_t count)
-{
-	u64 __iomem *reg_addr = caddr;
-	const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-	int ret;
-
-	/* not very efficient, but it works for now */
-	if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	while (reg_addr < reg_end) {
-		u64 data;
-		if (copy_from_user(&data, uaddr, sizeof(data))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		writeq(data, reg_addr);
-
-		reg_addr++;
-		uaddr += sizeof(u64);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy
- *
- * read 32 bit values, not 64 bit; for memories that only
- * support 32 bit reads; usually a single dword.
- */
-static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
-			     const void __iomem *caddr, size_t count)
-{
-	const u32 __iomem *reg_addr = caddr;
-	const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-	int ret;
-
-	if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-	    reg_end > (u32 __iomem *) dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	/* not very efficient, but it works for now */
-	while (reg_addr < reg_end) {
-		u32 data = readl(reg_addr);
-		if (copy_to_user(uaddr, &data, sizeof(data))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-
-		reg_addr++;
-		uaddr += sizeof(u32);
-
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: number of bytes to copy
- *
- * write 32 bit values, not 64 bit; for memories that only
- * support 32 bit write; usually a single dword.
- */
-
-static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
-			      const void __user *uaddr, size_t count)
-{
-	u32 __iomem *reg_addr = caddr;
-	const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-	int ret;
-
-	if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-	    reg_end > (u32 __iomem *) dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	while (reg_addr < reg_end) {
-		u32 data;
-		if (copy_from_user(&data, uaddr, sizeof(data))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		writel(data, reg_addr);
-
-		reg_addr++;
-		uaddr += sizeof(u32);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-static int ipath_diag_open(struct inode *in, struct file *fp)
-{
-	int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
-	struct ipath_devdata *dd;
-	int ret;
-
-	mutex_lock(&ipath_mutex);
-
-	if (ipath_diag_inuse) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	dd = ipath_lookup(unit);
-
-	if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
-	    !dd->ipath_kregbase) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	fp->private_data = dd;
-	ipath_diag_inuse = -2;
-	diag_set_link = 0;
-	ret = 0;
-
-	/* Only expose a way to reset the device if we
-	   make it into diag mode. */
-	ipath_expose_reset(&dd->pcidev->dev);
-
-bail:
-	mutex_unlock(&ipath_mutex);
-
-	return ret;
-}
-
-/**
- * ipath_diagpkt_write - write an IB packet
- * @fp: the diag data device file pointer
- * @data: ipath_diag_pkt structure saying where to get the packet
- * @count: size of data to write
- * @off: unused by this code
- */
-static ssize_t ipath_diagpkt_write(struct file *fp,
-				   const char __user *data,
-				   size_t count, loff_t *off)
-{
-	u32 __iomem *piobuf;
-	u32 plen, pbufn, maxlen_reserve;
-	struct ipath_diag_pkt odp;
-	struct ipath_diag_xpkt dp;
-	u32 *tmpbuf = NULL;
-	struct ipath_devdata *dd;
-	ssize_t ret = 0;
-	u64 val;
-	u32 l_state, lt_state; /* LinkState, LinkTrainingState */
-
-
-	if (count == sizeof(dp)) {
-		if (copy_from_user(&dp, data, sizeof(dp))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-	} else if (count == sizeof(odp)) {
-		if (copy_from_user(&odp, data, sizeof(odp))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		dp.len = odp.len;
-		dp.unit = odp.unit;
-		dp.data = odp.data;
-		dp.pbc_wd = 0;
-	} else {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* send count must be an exact number of dwords */
-	if (dp.len & 3) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	plen = dp.len >> 2;
-
-	dd = ipath_lookup(dp.unit);
-	if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
-	    !dd->ipath_kregbase) {
-		ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
-			   dp.unit);
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (ipath_diag_inuse && !diag_set_link &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		diag_set_link = 1;
-		ipath_cdbg(VERBOSE, "Trying to set to set link active for "
-			   "diag pkt\n");
-		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-	}
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
-		ret = -ENODEV;
-		goto bail;
-	}
-	/*
-	 * Want to skip check for l_state if using custom PBC,
-	 * because we might be trying to force an SM packet out.
-	 * first-cut, skip _all_ state checking in that case.
-	 */
-	val = ipath_ib_state(dd, dd->ipath_lastibcstat);
-	lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-	l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-	if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
-	    (val != dd->ib_init && val != dd->ib_arm &&
-	    val != dd->ib_active))) {
-		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
-			   dd->ipath_unit, (unsigned long long) val);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * need total length before first word written, plus 2 Dwords. One Dword
-	 * is for padding so we get the full user data when not aligned on
-	 * a word boundary. The other Dword is to make sure we have room for the
-	 * ICRC which gets tacked on later.
-	 */
-	maxlen_reserve = 2 * sizeof(u32);
-	if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
-		ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-			  dp.len, dd->ipath_ibmaxlen);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	plen = sizeof(u32) + dp.len;
-
-	tmpbuf = vmalloc(plen);
-	if (!tmpbuf) {
-		dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
-			 "failing\n");
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	if (copy_from_user(tmpbuf,
-			   (const void __user *) (unsigned long) dp.data,
-			   dp.len)) {
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	plen >>= 2;		/* in dwords */
-
-	piobuf = ipath_getpiobuf(dd, plen, &pbufn);
-	if (!piobuf) {
-		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
-			   dd->ipath_unit);
-		ret = -EBUSY;
-		goto bail;
-	}
-	/* disarm it just to be extra sure */
-	ipath_disarm_piobufs(dd, pbufn, 1);
-
-	if (ipath_debug & __IPATH_PKTDBG)
-		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
-			   dd->ipath_unit, plen - 1, pbufn);
-
-	if (dp.pbc_wd == 0)
-		dp.pbc_wd = plen;
-	writeq(dp.pbc_wd, piobuf);
-	/*
-	 * Copy all by the trigger word, then flush, so it's written
-	 * to chip before trigger word, then write trigger word, then
-	 * flush again, so packet is sent.
-	 */
-	if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-		ipath_flush_wc();
-		__iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
-		ipath_flush_wc();
-		__raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
-	} else
-		__iowrite32_copy(piobuf + 2, tmpbuf, plen);
-
-	ipath_flush_wc();
-
-	ret = sizeof(dp);
-
-bail:
-	vfree(tmpbuf);
-	return ret;
-}
-
-static int ipath_diag_release(struct inode *in, struct file *fp)
-{
-	mutex_lock(&ipath_mutex);
-	ipath_diag_inuse = 0;
-	fp->private_data = NULL;
-	mutex_unlock(&ipath_mutex);
-	return 0;
-}
-
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-			       size_t count, loff_t *off)
-{
-	struct ipath_devdata *dd = fp->private_data;
-	void __iomem *kreg_base;
-	ssize_t ret;
-
-	kreg_base = dd->ipath_kregbase;
-
-	if (count == 0)
-		ret = 0;
-	else if ((count % 4) || (*off % 4))
-		/* address or length is not 32-bit aligned, hence invalid */
-		ret = -EINVAL;
-	else if (ipath_diag_inuse < 1 && (*off || count != 8))
-		ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
-	else if ((count % 8) || (*off % 8))
-		/* address or length not 64-bit aligned; do 32-bit reads */
-		ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
-	else
-		ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
-
-	if (ret >= 0) {
-		*off += count;
-		ret = count;
-		if (ipath_diag_inuse == -2)
-			ipath_diag_inuse++;
-	}
-
-	return ret;
-}
-
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-				size_t count, loff_t *off)
-{
-	struct ipath_devdata *dd = fp->private_data;
-	void __iomem *kreg_base;
-	ssize_t ret;
-
-	kreg_base = dd->ipath_kregbase;
-
-	if (count == 0)
-		ret = 0;
-	else if ((count % 4) || (*off % 4))
-		/* address or length is not 32-bit aligned, hence invalid */
-		ret = -EINVAL;
-	else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
-		 ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
-		ret = -EINVAL;  /* before any other write allowed */
-	else if ((count % 8) || (*off % 8))
-		/* address or length not 64-bit aligned; do 32-bit writes */
-		ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
-	else
-		ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
-
-	if (ret >= 0) {
-		*off += count;
-		ret = count;
-		if (ipath_diag_inuse == -1)
-			ipath_diag_inuse = 1; /* all read/write OK now */
-	}
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_dma.c b/drivers/staging/rdma/ipath/ipath_dma.c
deleted file mode 100644
index 123a8c0..0000000
--- a/drivers/staging/rdma/ipath/ipath_dma.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/gfp.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-	return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 ipath_dma_map_single(struct ib_device *dev,
-			        void *cpu_addr, size_t size,
-			        enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-	return (u64) cpu_addr;
-}
-
-static void ipath_dma_unmap_single(struct ib_device *dev,
-				   u64 addr, size_t size,
-				   enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 ipath_dma_map_page(struct ib_device *dev,
-			      struct page *page,
-			      unsigned long offset,
-			      size_t size,
-			      enum dma_data_direction direction)
-{
-	u64 addr;
-
-	BUG_ON(!valid_dma_direction(direction));
-
-	if (offset + size > PAGE_SIZE) {
-		addr = BAD_DMA_ADDRESS;
-		goto done;
-	}
-
-	addr = (u64) page_address(page);
-	if (addr)
-		addr += offset;
-	/* TODO: handle highmem pages */
-
-done:
-	return addr;
-}
-
-static void ipath_dma_unmap_page(struct ib_device *dev,
-				 u64 addr, size_t size,
-				 enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-}
-
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-			int nents, enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	u64 addr;
-	int i;
-	int ret = nents;
-
-	BUG_ON(!valid_dma_direction(direction));
-
-	for_each_sg(sgl, sg, nents, i) {
-		addr = (u64) page_address(sg_page(sg));
-		/* TODO: handle highmem pages */
-		if (!addr) {
-			ret = 0;
-			break;
-		}
-		sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-		sg->dma_length = sg->length;
-#endif
-	}
-	return ret;
-}
-
-static void ipath_unmap_sg(struct ib_device *dev,
-			   struct scatterlist *sg, int nents,
-			   enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-}
-
-static void ipath_sync_single_for_cpu(struct ib_device *dev,
-				      u64 addr,
-				      size_t size,
-				      enum dma_data_direction dir)
-{
-}
-
-static void ipath_sync_single_for_device(struct ib_device *dev,
-					 u64 addr,
-					 size_t size,
-					 enum dma_data_direction dir)
-{
-}
-
-static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
-				      u64 *dma_handle, gfp_t flag)
-{
-	struct page *p;
-	void *addr = NULL;
-
-	p = alloc_pages(flag, get_order(size));
-	if (p)
-		addr = page_address(p);
-	if (dma_handle)
-		*dma_handle = (u64) addr;
-	return addr;
-}
-
-static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
-				    void *cpu_addr, u64 dma_handle)
-{
-	free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-	.mapping_error = ipath_mapping_error,
-	.map_single = ipath_dma_map_single,
-	.unmap_single = ipath_dma_unmap_single,
-	.map_page = ipath_dma_map_page,
-	.unmap_page = ipath_dma_unmap_page,
-	.map_sg = ipath_map_sg,
-	.unmap_sg = ipath_unmap_sg,
-	.sync_single_for_cpu = ipath_sync_single_for_cpu,
-	.sync_single_for_device = ipath_sync_single_for_device,
-	.alloc_coherent = ipath_dma_alloc_coherent,
-	.free_coherent = ipath_dma_free_coherent
-};
diff --git a/drivers/staging/rdma/ipath/ipath_driver.c b/drivers/staging/rdma/ipath/ipath_driver.c
deleted file mode 100644
index 2ab22f9..0000000
--- a/drivers/staging/rdma/ipath/ipath_driver.c
+++ /dev/null
@@ -1,2784 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/bitmap.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#ifdef CONFIG_X86_64
-#include <asm/pat.h>
-#endif
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-
-static void ipath_update_pio_bufs(struct ipath_devdata *);
-
-const char *ipath_get_unit_name(int unit)
-{
-	static char iname[16];
-	snprintf(iname, sizeof iname, "infinipath%u", unit);
-	return iname;
-}
-
-#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
-#define PFX IPATH_DRV_NAME ": "
-
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
- */
-const char ib_ipath_version[] = IPATH_IDSTR "\n";
-
-static struct idr unit_table;
-DEFINE_SPINLOCK(ipath_devs_lock);
-LIST_HEAD(ipath_dev_list);
-
-wait_queue_head_t ipath_state_wait;
-
-unsigned ipath_debug = __IPATH_INFO;
-
-module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "mask for debug prints");
-EXPORT_SYMBOL_GPL(ipath_debug);
-
-unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
-module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
-MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
-
-static unsigned ipath_hol_timeout_ms = 13000;
-module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
-MODULE_PARM_DESC(hol_timeout_ms,
-	"duration of user app suspension after link failure");
-
-unsigned ipath_linkrecovery = 1;
-module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
-
-/*
- * Table to translate the LINKTRAININGSTATE portion of
- * IBCStatus to a human-readable form.
- */
-const char *ipath_ibcstatus_str[] = {
-	"Disabled",
-	"LinkUp",
-	"PollActive",
-	"PollQuiet",
-	"SleepDelay",
-	"SleepQuiet",
-	"LState6",		/* unused */
-	"LState7",		/* unused */
-	"CfgDebounce",
-	"CfgRcvfCfg",
-	"CfgWaitRmt",
-	"CfgIdle",
-	"RecovRetrain",
-	"CfgTxRevLane",		/* unused before IBA7220 */
-	"RecovWaitRmt",
-	"RecovIdle",
-	/* below were added for IBA7220 */
-	"CfgEnhanced",
-	"CfgTest",
-	"CfgWaitRmtTest",
-	"CfgWaitCfgEnhanced",
-	"SendTS_T",
-	"SendTstIdles",
-	"RcvTS_T",
-	"SendTst_TS1s",
-	"LTState18", "LTState19", "LTState1A", "LTState1B",
-	"LTState1C", "LTState1D", "LTState1E", "LTState1F"
-};
-
-static void ipath_remove_one(struct pci_dev *);
-static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
-
-/* Only needed for registration, nothing else needs this info */
-#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-
-/* Number of seconds before our card status check...  */
-#define STATUS_TIMEOUT 60
-
-static const struct pci_device_id ipath_pci_tbl[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
-	{ 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
-
-static struct pci_driver ipath_driver = {
-	.name = IPATH_DRV_NAME,
-	.probe = ipath_init_one,
-	.remove = ipath_remove_one,
-	.id_table = ipath_pci_tbl,
-	.driver = {
-		.groups = ipath_driver_attr_groups,
-	},
-};
-
-static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
-			     u32 *bar0, u32 *bar1)
-{
-	int ret;
-
-	ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
-	if (ret)
-		ipath_dev_err(dd, "failed to read bar0 before enable: "
-			      "error %d\n", -ret);
-
-	ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
-	if (ret)
-		ipath_dev_err(dd, "failed to read bar1 before enable: "
-			      "error %d\n", -ret);
-
-	ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
-}
-
-static void ipath_free_devdata(struct pci_dev *pdev,
-			       struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	pci_set_drvdata(pdev, NULL);
-
-	if (dd->ipath_unit != -1) {
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-		idr_remove(&unit_table, dd->ipath_unit);
-		list_del(&dd->ipath_list);
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	}
-	vfree(dd);
-}
-
-static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
-{
-	unsigned long flags;
-	struct ipath_devdata *dd;
-	int ret;
-
-	dd = vzalloc(sizeof(*dd));
-	if (!dd) {
-		dd = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-	dd->ipath_unit = -1;
-
-	idr_preload(GFP_KERNEL);
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not allocate unit ID: error %d\n", -ret);
-		ipath_free_devdata(pdev, dd);
-		dd = ERR_PTR(ret);
-		goto bail_unlock;
-	}
-	dd->ipath_unit = ret;
-
-	dd->pcidev = pdev;
-	pci_set_drvdata(pdev, dd);
-
-	list_add(&dd->ipath_list, &ipath_dev_list);
-
-bail_unlock:
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	idr_preload_end();
-bail:
-	return dd;
-}
-
-static inline struct ipath_devdata *__ipath_lookup(int unit)
-{
-	return idr_find(&unit_table, unit);
-}
-
-struct ipath_devdata *ipath_lookup(int unit)
-{
-	struct ipath_devdata *dd;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-	dd = __ipath_lookup(unit);
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	return dd;
-}
-
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
-{
-	int nunits, npresent, nup;
-	struct ipath_devdata *dd;
-	unsigned long flags;
-	int maxports;
-
-	nunits = npresent = nup = maxports = 0;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-		nunits++;
-		if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
-			npresent++;
-		if (dd->ipath_lid &&
-		    !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
-					 | IPATH_LINKUNK)))
-			nup++;
-		if (dd->ipath_cfgports > maxports)
-			maxports = dd->ipath_cfgports;
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	if (npresentp)
-		*npresentp = npresent;
-	if (nupp)
-		*nupp = nup;
-	if (maxportsp)
-		*maxportsp = maxports;
-
-	return nunits;
-}
-
-/*
- * These next two routines are placeholders in case we don't have per-arch
- * code for controlling write combining.  If explicit control of write
- * combining is not available, performance will probably be awful.
- */
-
-int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
-{
-	return -EOPNOTSUPP;
-}
-
-void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
-{
-}
-
-/*
- * Perform a PIO buffer bandwidth write test, to verify proper system
- * configuration.  Even when all the setup calls work, occasionally
- * BIOS or other issues can prevent write combining from working, or
- * can cause other bandwidth problems to the chip.
- *
- * This test simply writes the same buffer over and over again, and
- * measures close to the peak bandwidth to the chip (not testing
- * data bandwidth to the wire).   On chips that use an address-based
- * trigger to send packets to the wire, this is easy.  On chips that
- * use a count to trigger, we want to make sure that the packet doesn't
- * go out on the wire, or trigger flow control checks.
- */
-static void ipath_verify_pioperf(struct ipath_devdata *dd)
-{
-	u32 pbnum, cnt, lcnt;
-	u32 __iomem *piobuf;
-	u32 *addr;
-	u64 msecs, emsecs;
-
-	piobuf = ipath_getpiobuf(dd, 0, &pbnum);
-	if (!piobuf) {
-		dev_info(&dd->pcidev->dev,
-			"No PIObufs for checking perf, skipping\n");
-		return;
-	}
-
-	/*
-	 * Enough to give us a reasonable test, less than piobuf size, and
-	 * likely multiple of store buffer length.
-	 */
-	cnt = 1024;
-
-	addr = vmalloc(cnt);
-	if (!addr) {
-		dev_info(&dd->pcidev->dev,
-			"Couldn't get memory for checking PIO perf,"
-			" skipping\n");
-		goto done;
-	}
-
-	preempt_disable();  /* we want reasonably accurate elapsed time */
-	msecs = 1 + jiffies_to_msecs(jiffies);
-	for (lcnt = 0; lcnt < 10000U; lcnt++) {
-		/* wait until we cross msec boundary */
-		if (jiffies_to_msecs(jiffies) >= msecs)
-			break;
-		udelay(1);
-	}
-
-	ipath_disable_armlaunch(dd);
-
-	/*
-	 * length 0, no dwords actually sent, and mark as VL15
-	 * on chips where that may matter (due to IB flowcontrol)
-	 */
-	if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
-		writeq(1UL << 63, piobuf);
-	else
-		writeq(0, piobuf);
-	ipath_flush_wc();
-
-	/*
-	 * this is only roughly accurate, since even with preempt we
-	 * still take interrupts that could take a while.   Running for
-	 * >= 5 msec seems to get us "close enough" to accurate values
-	 */
-	msecs = jiffies_to_msecs(jiffies);
-	for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
-		__iowrite32_copy(piobuf + 64, addr, cnt >> 2);
-		emsecs = jiffies_to_msecs(jiffies) - msecs;
-	}
-
-	/* 1 GiB/sec, slightly over IB SDR line rate */
-	if (lcnt < (emsecs * 1024U))
-		ipath_dev_err(dd,
-			"Performance problem: bandwidth to PIO buffers is "
-			"only %u MiB/sec\n",
-			lcnt / (u32) emsecs);
-	else
-		ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
-			lcnt / (u32) emsecs);
-
-	preempt_enable();
-
-	vfree(addr);
-
-done:
-	/* disarm piobuf, so it's available again */
-	ipath_disarm_piobufs(dd, pbnum, 1);
-	ipath_enable_armlaunch(dd);
-}
-
-static void cleanup_device(struct ipath_devdata *dd);
-
-static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int ret, len, j;
-	struct ipath_devdata *dd;
-	unsigned long long addr;
-	u32 bar0 = 0, bar1 = 0;
-
-#ifdef CONFIG_X86_64
-	if (pat_enabled()) {
-		pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n");
-		ret = -ENODEV;
-		goto bail;
-	}
-#endif
-
-	dd = ipath_alloc_devdata(pdev);
-	if (IS_ERR(dd)) {
-		ret = PTR_ERR(dd);
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not allocate devdata: error %d\n", -ret);
-		goto bail;
-	}
-
-	ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
-
-	ret = pci_enable_device(pdev);
-	if (ret) {
-		/* This can happen iff:
-		 *
-		 * We did a chip reset, and then failed to reprogram the
-		 * BAR, or the chip reset due to an internal error.  We then
-		 * unloaded the driver and reloaded it.
-		 *
-		 * Both reset cases set the BAR back to initial state.  For
-		 * the latter case, the AER sticky error bit at offset 0x718
-		 * should be set, but the Linux kernel doesn't yet know
-		 * about that, it appears.  If the original BAR was retained
-		 * in the kernel data structures, this may be OK.
-		 */
-		ipath_dev_err(dd, "enable unit %d failed: error %d\n",
-			      dd->ipath_unit, -ret);
-		goto bail_devdata;
-	}
-	addr = pci_resource_start(pdev, 0);
-	len = pci_resource_len(pdev, 0);
-	ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
-		   "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
-		   ent->device, ent->driver_data);
-
-	read_bars(dd, pdev, &bar0, &bar1);
-
-	if (!bar1 && !(bar0 & ~0xf)) {
-		if (addr) {
-			dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
-				 "rewriting as %llx\n", addr);
-			ret = pci_write_config_dword(
-				pdev, PCI_BASE_ADDRESS_0, addr);
-			if (ret) {
-				ipath_dev_err(dd, "rewrite of BAR0 "
-					      "failed: err %d\n", -ret);
-				goto bail_disable;
-			}
-			ret = pci_write_config_dword(
-				pdev, PCI_BASE_ADDRESS_1, addr >> 32);
-			if (ret) {
-				ipath_dev_err(dd, "rewrite of BAR1 "
-					      "failed: err %d\n", -ret);
-				goto bail_disable;
-			}
-		} else {
-			ipath_dev_err(dd, "BAR is 0 (probable RESET), "
-				      "not usable until reboot\n");
-			ret = -ENODEV;
-			goto bail_disable;
-		}
-	}
-
-	ret = pci_request_regions(pdev, IPATH_DRV_NAME);
-	if (ret) {
-		dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
-			 "err %d\n", dd->ipath_unit, -ret);
-		goto bail_disable;
-	}
-
-	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (ret) {
-		/*
-		 * if the 64 bit setup fails, try 32 bit.  Some systems
-		 * do not setup 64 bit maps on systems with 2GB or less
-		 * memory installed.
-		 */
-		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (ret) {
-			dev_info(&pdev->dev,
-				"Unable to set DMA mask for unit %u: %d\n",
-				dd->ipath_unit, ret);
-			goto bail_regions;
-		} else {
-			ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
-			ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-			if (ret)
-				dev_info(&pdev->dev,
-					"Unable to set DMA consistent mask "
-					"for unit %u: %d\n",
-					dd->ipath_unit, ret);
-
-		}
-	} else {
-		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (ret)
-			dev_info(&pdev->dev,
-				"Unable to set DMA consistent mask "
-				"for unit %u: %d\n",
-				dd->ipath_unit, ret);
-	}
-
-	pci_set_master(pdev);
-
-	/*
-	 * Save BARs to rewrite after device reset.  Save all 64 bits of
-	 * BAR, just in case.
-	 */
-	dd->ipath_pcibar0 = addr;
-	dd->ipath_pcibar1 = addr >> 32;
-	dd->ipath_deviceid = ent->device;	/* save for later use */
-	dd->ipath_vendorid = ent->vendor;
-
-	/* setup the chip-specific functions, as early as possible. */
-	switch (ent->device) {
-	case PCI_DEVICE_ID_INFINIPATH_HT:
-		ipath_init_iba6110_funcs(dd);
-		break;
-
-	default:
-		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
-			      "failing\n", ent->device);
-		return -ENODEV;
-	}
-
-	for (j = 0; j < 6; j++) {
-		if (!pdev->resource[j].start)
-			continue;
-		ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
-			   j, &pdev->resource[j],
-			   (unsigned long long)pci_resource_len(pdev, j));
-	}
-
-	if (!addr) {
-		ipath_dev_err(dd, "No valid address in BAR 0!\n");
-		ret = -ENODEV;
-		goto bail_regions;
-	}
-
-	dd->ipath_pcirev = pdev->revision;
-
-#if defined(__powerpc__)
-	/* There isn't a generic way to specify writethrough mappings */
-	dd->ipath_kregbase = __ioremap(addr, len,
-		(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
-#else
-	/* XXX: split this properly to enable on PAT */
-	dd->ipath_kregbase = ioremap_nocache(addr, len);
-#endif
-
-	if (!dd->ipath_kregbase) {
-		ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
-			  addr);
-		ret = -ENOMEM;
-		goto bail_iounmap;
-	}
-	dd->ipath_kregend = (u64 __iomem *)
-		((void __iomem *)dd->ipath_kregbase + len);
-	dd->ipath_physaddr = addr;	/* used for io_remap, etc. */
-	/* for user mmap */
-	ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
-		   addr, dd->ipath_kregbase);
-
-	if (dd->ipath_f_bus(dd, pdev))
-		ipath_dev_err(dd, "Failed to setup config space; "
-			      "continuing anyway\n");
-
-	/*
-	 * set up our interrupt handler; IRQF_SHARED probably not needed,
-	 * since MSI interrupts shouldn't be shared but won't  hurt for now.
-	 * check 0 irq after we return from chip-specific bus setup, since
-	 * that can affect this due to setup
-	 */
-	if (!dd->ipath_irq)
-		ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
-			      "work\n");
-	else {
-		ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
-				  IPATH_DRV_NAME, dd);
-		if (ret) {
-			ipath_dev_err(dd, "Couldn't setup irq handler, "
-				      "irq=%d: %d\n", dd->ipath_irq, ret);
-			goto bail_iounmap;
-		}
-	}
-
-	ret = ipath_init_chip(dd, 0);	/* do the chip-specific init */
-	if (ret)
-		goto bail_irqsetup;
-
-	ret = ipath_enable_wc(dd);
-
-	if (ret)
-		ret = 0;
-
-	ipath_verify_pioperf(dd);
-
-	ipath_device_create_group(&pdev->dev, dd);
-	ipathfs_add_device(dd);
-	ipath_user_add(dd);
-	ipath_diag_add(dd);
-	ipath_register_ib_device(dd);
-
-	goto bail;
-
-bail_irqsetup:
-	cleanup_device(dd);
-
-	if (dd->ipath_irq)
-		dd->ipath_f_free_irq(dd);
-
-	if (dd->ipath_f_cleanup)
-		dd->ipath_f_cleanup(dd);
-
-bail_iounmap:
-	iounmap((volatile void __iomem *) dd->ipath_kregbase);
-
-bail_regions:
-	pci_release_regions(pdev);
-
-bail_disable:
-	pci_disable_device(pdev);
-
-bail_devdata:
-	ipath_free_devdata(pdev, dd);
-
-bail:
-	return ret;
-}
-
-static void cleanup_device(struct ipath_devdata *dd)
-{
-	int port;
-	struct ipath_portdata **tmp;
-	unsigned long flags;
-
-	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-		/* can't do anything more with chip; needs re-init */
-		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-		if (dd->ipath_kregbase) {
-			/*
-			 * if we haven't already cleaned up before these are
-			 * to ensure any register reads/writes "fail" until
-			 * re-init
-			 */
-			dd->ipath_kregbase = NULL;
-			dd->ipath_uregbase = 0;
-			dd->ipath_sregbase = 0;
-			dd->ipath_cregbase = 0;
-			dd->ipath_kregsize = 0;
-		}
-		ipath_disable_wc(dd);
-	}
-
-	if (dd->ipath_spectriggerhit)
-		dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
-			 dd->ipath_spectriggerhit);
-
-	if (dd->ipath_pioavailregs_dma) {
-		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-				  (void *) dd->ipath_pioavailregs_dma,
-				  dd->ipath_pioavailregs_phys);
-		dd->ipath_pioavailregs_dma = NULL;
-	}
-	if (dd->ipath_dummy_hdrq) {
-		dma_free_coherent(&dd->pcidev->dev,
-			dd->ipath_pd[0]->port_rcvhdrq_size,
-			dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-		dd->ipath_dummy_hdrq = NULL;
-	}
-
-	if (dd->ipath_pageshadow) {
-		struct page **tmpp = dd->ipath_pageshadow;
-		dma_addr_t *tmpd = dd->ipath_physshadow;
-		int i, cnt = 0;
-
-		ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-			   "locked\n");
-		for (port = 0; port < dd->ipath_cfgports; port++) {
-			int port_tidbase = port * dd->ipath_rcvtidcnt;
-			int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-			for (i = port_tidbase; i < maxtid; i++) {
-				if (!tmpp[i])
-					continue;
-				pci_unmap_page(dd->pcidev, tmpd[i],
-					PAGE_SIZE, PCI_DMA_FROMDEVICE);
-				ipath_release_user_pages(&tmpp[i], 1);
-				tmpp[i] = NULL;
-				cnt++;
-			}
-		}
-		if (cnt) {
-			ipath_stats.sps_pageunlocks += cnt;
-			ipath_cdbg(VERBOSE, "There were still %u expTID "
-				   "entries locked\n", cnt);
-		}
-		if (ipath_stats.sps_pagelocks ||
-		    ipath_stats.sps_pageunlocks)
-			ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-				   "unlocked via ipath_m{un}lock\n",
-				   (unsigned long long)
-				   ipath_stats.sps_pagelocks,
-				   (unsigned long long)
-				   ipath_stats.sps_pageunlocks);
-
-		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-			   dd->ipath_pageshadow);
-		tmpp = dd->ipath_pageshadow;
-		dd->ipath_pageshadow = NULL;
-		vfree(tmpp);
-
-		dd->ipath_egrtidbase = NULL;
-	}
-
-	/*
-	 * free any resources still in use (usually just kernel ports)
-	 * at unload; we do for portcnt, because that's what we allocate.
-	 * We acquire lock to be really paranoid that ipath_pd isn't being
-	 * accessed from some interrupt-related code (that should not happen,
-	 * but best to be sure).
-	 */
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	tmp = dd->ipath_pd;
-	dd->ipath_pd = NULL;
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-	for (port = 0; port < dd->ipath_portcnt; port++) {
-		struct ipath_portdata *pd = tmp[port];
-		tmp[port] = NULL; /* debugging paranoia */
-		ipath_free_pddata(dd, pd);
-	}
-	kfree(tmp);
-}
-
-static void ipath_remove_one(struct pci_dev *pdev)
-{
-	struct ipath_devdata *dd = pci_get_drvdata(pdev);
-
-	ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
-
-	/*
-	 * disable the IB link early, to be sure no new packets arrive, which
-	 * complicates the shutdown process
-	 */
-	ipath_shutdown_device(dd);
-
-	flush_workqueue(ib_wq);
-
-	if (dd->verbs_dev)
-		ipath_unregister_ib_device(dd->verbs_dev);
-
-	ipath_diag_remove(dd);
-	ipath_user_remove(dd);
-	ipathfs_remove_device(dd);
-	ipath_device_remove_group(&pdev->dev, dd);
-
-	ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
-		   "unit %u\n", dd, (u32) dd->ipath_unit);
-
-	cleanup_device(dd);
-
-	/*
-	 * turn off rcv, send, and interrupts for all ports, all drivers
-	 * should also hard reset the chip here?
-	 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-	 * for all versions of the driver, if they were allocated
-	 */
-	if (dd->ipath_irq) {
-		ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
-			   dd->ipath_unit, dd->ipath_irq);
-		dd->ipath_f_free_irq(dd);
-	} else
-		ipath_dbg("irq is 0, not doing free_irq "
-			  "for unit %u\n", dd->ipath_unit);
-	/*
-	 * we check for NULL here, because it's outside
-	 * the kregbase check, and we need to call it
-	 * after the free_irq.	Thus it's possible that
-	 * the function pointers were never initialized.
-	 */
-	if (dd->ipath_f_cleanup)
-		/* clean up chip-specific stuff */
-		dd->ipath_f_cleanup(dd);
-
-	ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
-	iounmap((volatile void __iomem *) dd->ipath_kregbase);
-	pci_release_regions(pdev);
-	ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
-	pci_disable_device(pdev);
-
-	ipath_free_devdata(pdev, dd);
-}
-
-/* general driver use */
-DEFINE_MUTEX(ipath_mutex);
-
-static DEFINE_SPINLOCK(ipath_pioavail_lock);
-
-/**
- * ipath_disarm_piobufs - cancel a range of PIO buffers
- * @dd: the infinipath device
- * @first: the first PIO buffer to cancel
- * @cnt: the number of PIO buffers to cancel
- *
- * cancel a range of PIO buffers, used when they might be armed, but
- * not triggered.  Used at init to ensure buffer state, and also user
- * process close, in case it died while writing to a PIO buffer
- * Also after errors.
- */
-void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
-			  unsigned cnt)
-{
-	unsigned i, last = first + cnt;
-	unsigned long flags;
-
-	ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-	for (i = first; i < last; i++) {
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		/*
-		 * The disarm-related bits are write-only, so it
-		 * is ok to OR them in with our copy of sendctrl
-		 * while we hold the lock.
-		 */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl | INFINIPATH_S_DISARM |
-			(i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
-		/* can't disarm bufs back-to-back per iba7220 spec */
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-	/* on some older chips, update may not happen after cancel */
-	ipath_force_pio_avail_update(dd);
-}
-
-/**
- * ipath_wait_linkstate - wait for an IB link state change to occur
- * @dd: the infinipath device
- * @state: the state to wait for
- * @msecs: the number of milliseconds to wait
- *
- * wait up to msecs milliseconds for IB link state change to occur for
- * now, take the easy polling route.  Currently used only by
- * ipath_set_linkstate.  Returns 0 if state reached, otherwise
- * -ETIMEDOUT state can have multiple states set, for any of several
- * transitions.
- */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
-{
-	dd->ipath_state_wanted = state;
-	wait_event_interruptible_timeout(ipath_state_wait,
-					 (dd->ipath_flags & state),
-					 msecs_to_jiffies(msecs));
-	dd->ipath_state_wanted = 0;
-
-	if (!(dd->ipath_flags & state)) {
-		u64 val;
-		ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
-			   " ms\n",
-			   /* test INIT ahead of DOWN, both can be set */
-			   (state & IPATH_LINKINIT) ? "INIT" :
-			   ((state & IPATH_LINKDOWN) ? "DOWN" :
-			    ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
-			   msecs);
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-		ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
-			   (unsigned long long) ipath_read_kreg64(
-				   dd, dd->ipath_kregs->kr_ibcctrl),
-			   (unsigned long long) val,
-			   ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
-	}
-	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
-}
-
-static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
-	char *buf, size_t blen)
-{
-	static const struct {
-		ipath_err_t err;
-		const char *msg;
-	} errs[] = {
-		{ INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
-		{ INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
-		{ INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
-		{ INFINIPATH_E_SDMABASE, "SDmaBase" },
-		{ INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
-		{ INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
-		{ INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
-		{ INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
-		{ INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
-		{ INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
-		{ INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
-		{ INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
-	};
-	int i;
-	int expected;
-	size_t bidx = 0;
-
-	for (i = 0; i < ARRAY_SIZE(errs); i++) {
-		expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
-			test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-		if ((err & errs[i].err) && !expected)
-			bidx += snprintf(buf + bidx, blen - bidx,
-					 "%s ", errs[i].msg);
-	}
-}
-
-/*
- * Decode the error status into strings, deciding whether to always
- * print * it or not depending on "normal packet errors" vs everything
- * else.   Return 1 if "real" errors, otherwise 0 if only packet
- * errors, so caller can decide what to print with the string.
- */
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-	ipath_err_t err)
-{
-	int iserr = 1;
-	*buf = '\0';
-	if (err & INFINIPATH_E_PKTERRS) {
-		if (!(err & ~INFINIPATH_E_PKTERRS))
-			iserr = 0; // if only packet errors.
-		if (ipath_debug & __IPATH_ERRPKTDBG) {
-			if (err & INFINIPATH_E_REBP)
-				strlcat(buf, "EBP ", blen);
-			if (err & INFINIPATH_E_RVCRC)
-				strlcat(buf, "VCRC ", blen);
-			if (err & INFINIPATH_E_RICRC) {
-				strlcat(buf, "CRC ", blen);
-				// clear for check below, so only once
-				err &= INFINIPATH_E_RICRC;
-			}
-			if (err & INFINIPATH_E_RSHORTPKTLEN)
-				strlcat(buf, "rshortpktlen ", blen);
-			if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-				strlcat(buf, "sdroppeddatapkt ", blen);
-			if (err & INFINIPATH_E_SPKTLEN)
-				strlcat(buf, "spktlen ", blen);
-		}
-		if ((err & INFINIPATH_E_RICRC) &&
-			!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
-			strlcat(buf, "CRC ", blen);
-		if (!iserr)
-			goto done;
-	}
-	if (err & INFINIPATH_E_RHDRLEN)
-		strlcat(buf, "rhdrlen ", blen);
-	if (err & INFINIPATH_E_RBADTID)
-		strlcat(buf, "rbadtid ", blen);
-	if (err & INFINIPATH_E_RBADVERSION)
-		strlcat(buf, "rbadversion ", blen);
-	if (err & INFINIPATH_E_RHDR)
-		strlcat(buf, "rhdr ", blen);
-	if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
-		strlcat(buf, "sendspecialtrigger ", blen);
-	if (err & INFINIPATH_E_RLONGPKTLEN)
-		strlcat(buf, "rlongpktlen ", blen);
-	if (err & INFINIPATH_E_RMAXPKTLEN)
-		strlcat(buf, "rmaxpktlen ", blen);
-	if (err & INFINIPATH_E_RMINPKTLEN)
-		strlcat(buf, "rminpktlen ", blen);
-	if (err & INFINIPATH_E_SMINPKTLEN)
-		strlcat(buf, "sminpktlen ", blen);
-	if (err & INFINIPATH_E_RFORMATERR)
-		strlcat(buf, "rformaterr ", blen);
-	if (err & INFINIPATH_E_RUNSUPVL)
-		strlcat(buf, "runsupvl ", blen);
-	if (err & INFINIPATH_E_RUNEXPCHAR)
-		strlcat(buf, "runexpchar ", blen);
-	if (err & INFINIPATH_E_RIBFLOW)
-		strlcat(buf, "ribflow ", blen);
-	if (err & INFINIPATH_E_SUNDERRUN)
-		strlcat(buf, "sunderrun ", blen);
-	if (err & INFINIPATH_E_SPIOARMLAUNCH)
-		strlcat(buf, "spioarmlaunch ", blen);
-	if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
-		strlcat(buf, "sunexperrpktnum ", blen);
-	if (err & INFINIPATH_E_SDROPPEDSMPPKT)
-		strlcat(buf, "sdroppedsmppkt ", blen);
-	if (err & INFINIPATH_E_SMAXPKTLEN)
-		strlcat(buf, "smaxpktlen ", blen);
-	if (err & INFINIPATH_E_SUNSUPVL)
-		strlcat(buf, "sunsupVL ", blen);
-	if (err & INFINIPATH_E_INVALIDADDR)
-		strlcat(buf, "invalidaddr ", blen);
-	if (err & INFINIPATH_E_RRCVEGRFULL)
-		strlcat(buf, "rcvegrfull ", blen);
-	if (err & INFINIPATH_E_RRCVHDRFULL)
-		strlcat(buf, "rcvhdrfull ", blen);
-	if (err & INFINIPATH_E_IBSTATUSCHANGED)
-		strlcat(buf, "ibcstatuschg ", blen);
-	if (err & INFINIPATH_E_RIBLOSTLINK)
-		strlcat(buf, "riblostlink ", blen);
-	if (err & INFINIPATH_E_HARDWARE)
-		strlcat(buf, "hardware ", blen);
-	if (err & INFINIPATH_E_RESET)
-		strlcat(buf, "reset ", blen);
-	if (err & INFINIPATH_E_SDMAERRS)
-		decode_sdma_errs(dd, err, buf, blen);
-	if (err & INFINIPATH_E_INVALIDEEPCMD)
-		strlcat(buf, "invalideepromcmd ", blen);
-done:
-	return iserr;
-}
-
-/**
- * get_rhf_errstring - decode RHF errors
- * @err: the err number
- * @msg: the output buffer
- * @len: the length of the output buffer
- *
- * only used one place now, may want more later
- */
-static void get_rhf_errstring(u32 err, char *msg, size_t len)
-{
-	/* if no errors, and so don't need to check what's first */
-	*msg = '\0';
-
-	if (err & INFINIPATH_RHF_H_ICRCERR)
-		strlcat(msg, "icrcerr ", len);
-	if (err & INFINIPATH_RHF_H_VCRCERR)
-		strlcat(msg, "vcrcerr ", len);
-	if (err & INFINIPATH_RHF_H_PARITYERR)
-		strlcat(msg, "parityerr ", len);
-	if (err & INFINIPATH_RHF_H_LENERR)
-		strlcat(msg, "lenerr ", len);
-	if (err & INFINIPATH_RHF_H_MTUERR)
-		strlcat(msg, "mtuerr ", len);
-	if (err & INFINIPATH_RHF_H_IHDRERR)
-		/* infinipath hdr checksum error */
-		strlcat(msg, "ipathhdrerr ", len);
-	if (err & INFINIPATH_RHF_H_TIDERR)
-		strlcat(msg, "tiderr ", len);
-	if (err & INFINIPATH_RHF_H_MKERR)
-		/* bad port, offset, etc. */
-		strlcat(msg, "invalid ipathhdr ", len);
-	if (err & INFINIPATH_RHF_H_IBERR)
-		strlcat(msg, "iberr ", len);
-	if (err & INFINIPATH_RHF_L_SWA)
-		strlcat(msg, "swA ", len);
-	if (err & INFINIPATH_RHF_L_SWB)
-		strlcat(msg, "swB ", len);
-}
-
-/**
- * ipath_get_egrbuf - get an eager buffer
- * @dd: the infinipath device
- * @bufnum: the eager buffer to get
- *
- * must only be called if ipath_pd[port] is known to be allocated
- */
-static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
-{
-	return dd->ipath_port0_skbinfo ?
-		(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
-}
-
-/**
- * ipath_alloc_skb - allocate an skb and buffer with possible constraints
- * @dd: the infinipath device
- * @gfp_mask: the sk_buff SFP mask
- */
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
-				gfp_t gfp_mask)
-{
-	struct sk_buff *skb;
-	u32 len;
-
-	/*
-	 * Only fully supported way to handle this is to allocate lots
-	 * extra, align as needed, and then do skb_reserve().  That wastes
-	 * a lot of memory...  I'll have to hack this into infinipath_copy
-	 * also.
-	 */
-
-	/*
-	 * We need 2 extra bytes for ipath_ether data sent in the
-	 * key header.  In order to keep everything dword aligned,
-	 * we'll reserve 4 bytes.
-	 */
-	len = dd->ipath_ibmaxlen + 4;
-
-	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		/* We need a 2KB multiple alignment, and there is no way
-		 * to do it except to allocate extra and then skb_reserve
-		 * enough to bring it up to the right alignment.
-		 */
-		len += 2047;
-	}
-
-	skb = __dev_alloc_skb(len, gfp_mask);
-	if (!skb) {
-		ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
-			      len);
-		goto bail;
-	}
-
-	skb_reserve(skb, 4);
-
-	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		u32 una = (unsigned long)skb->data & 2047;
-		if (una)
-			skb_reserve(skb, 2048 - una);
-	}
-
-bail:
-	return skb;
-}
-
-static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
-			     u32 eflags,
-			     u32 l,
-			     u32 etail,
-			     __le32 *rhf_addr,
-			     struct ipath_message_header *hdr)
-{
-	char emsg[128];
-
-	get_rhf_errstring(eflags, emsg, sizeof emsg);
-	ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
-		   "tlen=%x opcode=%x egridx=%x: %s\n",
-		   eflags, l,
-		   ipath_hdrget_rcv_type(rhf_addr),
-		   ipath_hdrget_length_in_bytes(rhf_addr),
-		   be32_to_cpu(hdr->bth[0]) >> 24,
-		   etail, emsg);
-
-	/* Count local link integrity errors. */
-	if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
-		u8 n = (dd->ipath_ibcctrl >>
-			INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-			INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-
-		if (++dd->ipath_lli_counter > n) {
-			dd->ipath_lli_counter = 0;
-			dd->ipath_lli_errors++;
-		}
-	}
-}
-
-/*
- * ipath_kreceive - receive a packet
- * @pd: the infinipath port
- *
- * called from interrupt handler for errors or receive interrupt
- */
-void ipath_kreceive(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	__le32 *rhf_addr;
-	void *ebuf;
-	const u32 rsize = dd->ipath_rcvhdrentsize;	/* words */
-	const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize;	/* words */
-	u32 etail = -1, l, hdrqtail;
-	struct ipath_message_header *hdr;
-	u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
-	static u64 totcalls;	/* stats, may eventually remove */
-	int last;
-
-	l = pd->port_head;
-	rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
-	if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-		u32 seq = ipath_hdrget_seq(rhf_addr);
-
-		if (seq != pd->port_seq_cnt)
-			goto bail;
-		hdrqtail = 0;
-	} else {
-		hdrqtail = ipath_get_rcvhdrtail(pd);
-		if (l == hdrqtail)
-			goto bail;
-		smp_rmb();
-	}
-
-reloop:
-	for (last = 0, i = 1; !last; i += !last) {
-		hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
-		eflags = ipath_hdrget_err_flags(rhf_addr);
-		etype = ipath_hdrget_rcv_type(rhf_addr);
-		/* total length */
-		tlen = ipath_hdrget_length_in_bytes(rhf_addr);
-		ebuf = NULL;
-		if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
-		    ipath_hdrget_use_egr_buf(rhf_addr) :
-		    (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-			/*
-			 * It turns out that the chip uses an eager buffer
-			 * for all non-expected packets, whether it "needs"
-			 * one or not.  So always get the index, but don't
-			 * set ebuf (so we try to copy data) unless the
-			 * length requires it.
-			 */
-			etail = ipath_hdrget_index(rhf_addr);
-			updegr = 1;
-			if (tlen > sizeof(*hdr) ||
-			    etype == RCVHQ_RCV_TYPE_NON_KD)
-				ebuf = ipath_get_egrbuf(dd, etail);
-		}
-
-		/*
-		 * both tiderr and ipathhdrerr are set for all plain IB
-		 * packets; only ipathhdrerr should be set.
-		 */
-
-		if (etype != RCVHQ_RCV_TYPE_NON_KD &&
-		    etype != RCVHQ_RCV_TYPE_ERROR &&
-		    ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
-		    IPS_PROTO_VERSION)
-			ipath_cdbg(PKT, "Bad InfiniPath protocol version "
-				   "%x\n", etype);
-
-		if (unlikely(eflags))
-			ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
-		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-			ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
-			if (dd->ipath_lli_counter)
-				dd->ipath_lli_counter--;
-		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-			u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
-			u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
-			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-				   "qp=%x), len %x; ignored\n",
-				   etype, opcode, qp, tlen);
-		} else if (etype == RCVHQ_RCV_TYPE_EXPECTED) {
-			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
-				  be32_to_cpu(hdr->bth[0]) >> 24);
-		} else {
-			/*
-			 * error packet, type of error unknown.
-			 * Probably type 3, but we don't know, so don't
-			 * even try to print the opcode, etc.
-			 * Usually caused by a "bad packet", that has no
-			 * BTH, when the LRH says it should.
-			 */
-			ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
-				  " %x, len %x hdrq+%x rhf: %Lx\n",
-				  etail, tlen, l, (unsigned long long)
-				  le64_to_cpu(*(__le64 *) rhf_addr));
-			if (ipath_debug & __IPATH_ERRPKTDBG) {
-				u32 j, *d, dw = rsize-2;
-				if (rsize > (tlen>>2))
-					dw = tlen>>2;
-				d = (u32 *)hdr;
-				printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
-					dw);
-				for (j = 0; j < dw; j++)
-					printk(KERN_DEBUG "%8x%s", d[j],
-						(j%8) == 7 ? "\n" : " ");
-				printk(KERN_DEBUG ".\n");
-			}
-		}
-		l += rsize;
-		if (l >= maxcnt)
-			l = 0;
-		rhf_addr = (__le32 *) pd->port_rcvhdrq +
-			l + dd->ipath_rhf_offset;
-		if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-			u32 seq = ipath_hdrget_seq(rhf_addr);
-
-			if (++pd->port_seq_cnt > 13)
-				pd->port_seq_cnt = 1;
-			if (seq != pd->port_seq_cnt)
-				last = 1;
-		} else if (l == hdrqtail) {
-			last = 1;
-		}
-		/*
-		 * update head regs on last packet, and every 16 packets.
-		 * Reduce bus traffic, while still trying to prevent
-		 * rcvhdrq overflows, for when the queue is nearly full
-		 */
-		if (last || !(i & 0xf)) {
-			u64 lval = l;
-
-			/* request IBA6120 and 7220 interrupt only on last */
-			if (last)
-				lval |= dd->ipath_rhdrhead_intr_off;
-			ipath_write_ureg(dd, ur_rcvhdrhead, lval,
-				pd->port_port);
-			if (updegr) {
-				ipath_write_ureg(dd, ur_rcvegrindexhead,
-						 etail, pd->port_port);
-				updegr = 0;
-			}
-		}
-	}
-
-	if (!dd->ipath_rhdrhead_intr_off && !reloop &&
-	    !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-		/* IBA6110 workaround; we can have a race clearing chip
-		 * interrupt with another interrupt about to be delivered,
-		 * and can clear it before it is delivered on the GPIO
-		 * workaround.  By doing the extra check here for the
-		 * in-memory tail register updating while we were doing
-		 * earlier packets, we "almost" guarantee we have covered
-		 * that case.
-		 */
-		u32 hqtail = ipath_get_rcvhdrtail(pd);
-		if (hqtail != hdrqtail) {
-			hdrqtail = hqtail;
-			reloop = 1; /* loop 1 extra time at most */
-			goto reloop;
-		}
-	}
-
-	pkttot += i;
-
-	pd->port_head = l;
-
-	if (pkttot > ipath_stats.sps_maxpkts_call)
-		ipath_stats.sps_maxpkts_call = pkttot;
-	ipath_stats.sps_port0pkts += pkttot;
-	ipath_stats.sps_avgpkts_call =
-		ipath_stats.sps_port0pkts / ++totcalls;
-
-bail:;
-}
-
-/**
- * ipath_update_pio_bufs - update shadow copy of the PIO availability map
- * @dd: the infinipath device
- *
- * called whenever our local copy indicates we have run out of send buffers
- * NOTE: This can be called from interrupt context by some code
- * and from non-interrupt context by ipath_getpiobuf().
- */
-
-static void ipath_update_pio_bufs(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	int i;
-	const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
-
-	/* If the generation (check) bits have changed, then we update the
-	 * busy bit for the corresponding PIO buffer.  This algorithm will
-	 * modify positions to the value they already have in some cases
-	 * (i.e., no change), but it's faster than changing only the bits
-	 * that have changed.
-	 *
-	 * We would like to do this atomicly, to avoid spinlocks in the
-	 * critical send path, but that's not really possible, given the
-	 * type of changes, and that this routine could be called on
-	 * multiple cpu's simultaneously, so we lock in this routine only,
-	 * to avoid conflicting updates; all we change is the shadow, and
-	 * it's a single 64 bit memory location, so by definition the update
-	 * is atomic in terms of what other cpu's can see in testing the
-	 * bits.  The spin_lock overhead isn't too bad, since it only
-	 * happens when all buffers are in use, so only cpu overhead, not
-	 * latency or bandwidth is affected.
-	 */
-	if (!dd->ipath_pioavailregs_dma) {
-		ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
-		return;
-	}
-	if (ipath_debug & __IPATH_VERBDBG) {
-		/* only if packet debug and verbose */
-		volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-		unsigned long *shadow = dd->ipath_pioavailshadow;
-
-		ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
-			   "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
-			   "s3=%lx\n",
-			   (unsigned long long) le64_to_cpu(dma[0]),
-			   shadow[0],
-			   (unsigned long long) le64_to_cpu(dma[1]),
-			   shadow[1],
-			   (unsigned long long) le64_to_cpu(dma[2]),
-			   shadow[2],
-			   (unsigned long long) le64_to_cpu(dma[3]),
-			   shadow[3]);
-		if (piobregs > 4)
-			ipath_cdbg(
-				PKT, "2nd group, dma4=%llx shad4=%lx, "
-				"d5=%llx s5=%lx, d6=%llx s6=%lx, "
-				"d7=%llx s7=%lx\n",
-				(unsigned long long) le64_to_cpu(dma[4]),
-				shadow[4],
-				(unsigned long long) le64_to_cpu(dma[5]),
-				shadow[5],
-				(unsigned long long) le64_to_cpu(dma[6]),
-				shadow[6],
-				(unsigned long long) le64_to_cpu(dma[7]),
-				shadow[7]);
-	}
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (i = 0; i < piobregs; i++) {
-		u64 pchbusy, pchg, piov, pnew;
-		/*
-		 * Chip Errata: bug 6641; even and odd qwords>3 are swapped
-		 */
-		if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-			piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
-		else
-			piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
-		pchg = dd->ipath_pioavailkernel[i] &
-			~(dd->ipath_pioavailshadow[i] ^ piov);
-		pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
-		if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
-			pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
-			pnew |= piov & pchbusy;
-			dd->ipath_pioavailshadow[i] = pnew;
-		}
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/*
- * used to force update of pioavailshadow if we can't get a pio buffer.
- * Needed primarily due to exitting freeze mode after recovering
- * from errors.  Done lazily, because it's safer (known to not
- * be writing pio buffers).
- */
-static void ipath_reset_availshadow(struct ipath_devdata *dd)
-{
-	int i, im;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (i = 0; i < dd->ipath_pioavregs; i++) {
-		u64 val, oldval;
-		/* deal with 6110 chip bug on high register #s */
-		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-			i ^ 1 : i;
-		val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
-		/*
-		 * busy out the buffers not in the kernel avail list,
-		 * without changing the generation bits.
-		 */
-		oldval = dd->ipath_pioavailshadow[i];
-		dd->ipath_pioavailshadow[i] = val |
-			((~dd->ipath_pioavailkernel[i] <<
-			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
-			0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
-		if (oldval != dd->ipath_pioavailshadow[i])
-			ipath_dbg("shadow[%d] was %Lx, now %lx\n",
-				i, (unsigned long long) oldval,
-				dd->ipath_pioavailshadow[i]);
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/**
- * ipath_setrcvhdrsize - set the receive header size
- * @dd: the infinipath device
- * @rhdrsize: the receive header size
- *
- * called from user init code, and also layered driver init
- */
-int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
-{
-	int ret = 0;
-
-	if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
-		if (dd->ipath_rcvhdrsize != rhdrsize) {
-			dev_info(&dd->pcidev->dev,
-				 "Error: can't set protocol header "
-				 "size %u, already %u\n",
-				 rhdrsize, dd->ipath_rcvhdrsize);
-			ret = -EAGAIN;
-		} else
-			ipath_cdbg(VERBOSE, "Reuse same protocol header "
-				   "size %u\n", dd->ipath_rcvhdrsize);
-	} else if (rhdrsize > (dd->ipath_rcvhdrentsize -
-			       (sizeof(u64) / sizeof(u32)))) {
-		ipath_dbg("Error: can't set protocol header size %u "
-			  "(> max %u)\n", rhdrsize,
-			  dd->ipath_rcvhdrentsize -
-			  (u32) (sizeof(u64) / sizeof(u32)));
-		ret = -EOVERFLOW;
-	} else {
-		dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
-		dd->ipath_rcvhdrsize = rhdrsize;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-				 dd->ipath_rcvhdrsize);
-		ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
-			   dd->ipath_rcvhdrsize);
-	}
-	return ret;
-}
-
-/*
- * debugging code and stats updates if no pio buffers available.
- */
-static noinline void no_pio_bufs(struct ipath_devdata *dd)
-{
-	unsigned long *shadow = dd->ipath_pioavailshadow;
-	__le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
-
-	dd->ipath_upd_pio_shadow = 1;
-
-	/*
-	 * not atomic, but if we lose a stat count in a while, that's OK
-	 */
-	ipath_stats.sps_nopiobufs++;
-	if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-		ipath_force_pio_avail_update(dd); /* at start */
-		ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
-			"%llx %llx %llx %llx\n"
-			"ipath  shadow:  %lx %lx %lx %lx\n",
-			dd->ipath_consec_nopiobuf,
-			(unsigned long)get_cycles(),
-			(unsigned long long) le64_to_cpu(dma[0]),
-			(unsigned long long) le64_to_cpu(dma[1]),
-			(unsigned long long) le64_to_cpu(dma[2]),
-			(unsigned long long) le64_to_cpu(dma[3]),
-			shadow[0], shadow[1], shadow[2], shadow[3]);
-		/*
-		 * 4 buffers per byte, 4 registers above, cover rest
-		 * below
-		 */
-		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
-		    (sizeof(shadow[0]) * 4 * 4))
-			ipath_dbg("2nd group: dmacopy: "
-				  "%llx %llx %llx %llx\n"
-				  "ipath  shadow:  %lx %lx %lx %lx\n",
-				  (unsigned long long)le64_to_cpu(dma[4]),
-				  (unsigned long long)le64_to_cpu(dma[5]),
-				  (unsigned long long)le64_to_cpu(dma[6]),
-				  (unsigned long long)le64_to_cpu(dma[7]),
-				  shadow[4], shadow[5], shadow[6], shadow[7]);
-
-		/* at end, so update likely happened */
-		ipath_reset_availshadow(dd);
-	}
-}
-
-/*
- * common code for normal driver pio buffer allocation, and reserved
- * allocation.
- *
- * do appropriate marking as busy, etc.
- * returns buffer number if one found (>=0), negative number is error.
- */
-static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
-	u32 *pbufnum, u32 first, u32 last, u32 firsti)
-{
-	int i, j, updated = 0;
-	unsigned piobcnt;
-	unsigned long flags;
-	unsigned long *shadow = dd->ipath_pioavailshadow;
-	u32 __iomem *buf;
-
-	piobcnt = last - first;
-	if (dd->ipath_upd_pio_shadow) {
-		/*
-		 * Minor optimization.  If we had no buffers on last call,
-		 * start out by doing the update; continue and do scan even
-		 * if no buffers were updated, to be paranoid
-		 */
-		ipath_update_pio_bufs(dd);
-		updated++;
-		i = first;
-	} else
-		i = firsti;
-rescan:
-	/*
-	 * while test_and_set_bit() is atomic, we do that and then the
-	 * change_bit(), and the pair is not.  See if this is the cause
-	 * of the remaining armlaunch errors.
-	 */
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (j = 0; j < piobcnt; j++, i++) {
-		if (i >= last)
-			i = first;
-		if (__test_and_set_bit((2 * i) + 1, shadow))
-			continue;
-		/* flip generation bit */
-		__change_bit(2 * i, shadow);
-		break;
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-	if (j == piobcnt) {
-		if (!updated) {
-			/*
-			 * first time through; shadow exhausted, but may be
-			 * buffers available, try an update and then rescan.
-			 */
-			ipath_update_pio_bufs(dd);
-			updated++;
-			i = first;
-			goto rescan;
-		} else if (updated == 1 && piobcnt <=
-			((dd->ipath_sendctrl
-			>> INFINIPATH_S_UPDTHRESH_SHIFT) &
-			INFINIPATH_S_UPDTHRESH_MASK)) {
-			/*
-			 * for chips supporting and using the update
-			 * threshold we need to force an update of the
-			 * in-memory copy if the count is less than the
-			 * thershold, then check one more time.
-			 */
-			ipath_force_pio_avail_update(dd);
-			ipath_update_pio_bufs(dd);
-			updated++;
-			i = first;
-			goto rescan;
-		}
-
-		no_pio_bufs(dd);
-		buf = NULL;
-	} else {
-		if (i < dd->ipath_piobcnt2k)
-			buf = (u32 __iomem *) (dd->ipath_pio2kbase +
-					       i * dd->ipath_palign);
-		else
-			buf = (u32 __iomem *)
-				(dd->ipath_pio4kbase +
-				 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-		if (pbufnum)
-			*pbufnum = i;
-	}
-
-	return buf;
-}
-
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @plen: the size of the PIO buffer needed in 32-bit words
- * @pbufnum: the buffer number is placed here
- */
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
-{
-	u32 __iomem *buf;
-	u32 pnum, nbufs;
-	u32 first, lasti;
-
-	if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
-		first = dd->ipath_piobcnt2k;
-		lasti = dd->ipath_lastpioindexl;
-	} else {
-		first = 0;
-		lasti = dd->ipath_lastpioindex;
-	}
-	nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
-
-	if (buf) {
-		/*
-		 * Set next starting place.  It's just an optimization,
-		 * it doesn't matter who wins on this, so no locking
-		 */
-		if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-			dd->ipath_lastpioindexl = pnum + 1;
-		else
-			dd->ipath_lastpioindex = pnum + 1;
-		if (dd->ipath_upd_pio_shadow)
-			dd->ipath_upd_pio_shadow = 0;
-		if (dd->ipath_consec_nopiobuf)
-			dd->ipath_consec_nopiobuf = 0;
-		ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
-			   pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
-		if (pbufnum)
-			*pbufnum = pnum;
-
-	}
-	return buf;
-}
-
-/**
- * ipath_chg_pioavailkernel - change which send buffers are available for kernel
- * @dd: the infinipath device
- * @start: the starting send buffer number
- * @len: the number of send buffers
- * @avail: true if the buffers are available for kernel use, false otherwise
- */
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-			      unsigned len, int avail)
-{
-	unsigned long flags;
-	unsigned end, cnt = 0;
-
-	/* There are two bits per send buffer (busy and generation) */
-	start *= 2;
-	end = start + len * 2;
-
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	/* Set or clear the busy bit in the shadow. */
-	while (start < end) {
-		if (avail) {
-			unsigned long dma;
-			int i, im;
-			/*
-			 * the BUSY bit will never be set, because we disarm
-			 * the user buffers before we hand them back to the
-			 * kernel.  We do have to make sure the generation
-			 * bit is set correctly in shadow, since it could
-			 * have changed many times while allocated to user.
-			 * We can't use the bitmap functions on the full
-			 * dma array because it is always little-endian, so
-			 * we have to flip to host-order first.
-			 * BITS_PER_LONG is slightly wrong, since it's
-			 * always 64 bits per register in chip...
-			 * We only work on 64 bit kernels, so that's OK.
-			 */
-			/* deal with 6110 chip bug on high register #s */
-			i = start / BITS_PER_LONG;
-			im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-				i ^ 1 : i;
-			__clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
-				+ start, dd->ipath_pioavailshadow);
-			dma = (unsigned long) le64_to_cpu(
-				dd->ipath_pioavailregs_dma[im]);
-			if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-				+ start) % BITS_PER_LONG, &dma))
-				__set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-					+ start, dd->ipath_pioavailshadow);
-			else
-				__clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-					+ start, dd->ipath_pioavailshadow);
-			__set_bit(start, dd->ipath_pioavailkernel);
-		} else {
-			__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
-				dd->ipath_pioavailshadow);
-			__clear_bit(start, dd->ipath_pioavailkernel);
-		}
-		start += 2;
-	}
-
-	if (dd->ipath_pioupd_thresh) {
-		end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-		cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-	/*
-	 * When moving buffers from kernel to user, if number assigned to
-	 * the user is less than the pio update threshold, and threshold
-	 * is supported (cnt was computed > 0), drop the update threshold
-	 * so we update at least once per allocated number of buffers.
-	 * In any case, if the kernel buffers are less than the threshold,
-	 * drop the threshold.  We don't bother increasing it, having once
-	 * decreased it, since it would typically just cycle back and forth.
-	 * If we don't decrease below buffers in use, we can wait a long
-	 * time for an update, until some other context uses PIO buffers.
-	 */
-	if (!avail && len < cnt)
-		cnt = len;
-	if (cnt < dd->ipath_pioupd_thresh) {
-		dd->ipath_pioupd_thresh = cnt;
-		ipath_dbg("Decreased pio update threshold to %u\n",
-			dd->ipath_pioupd_thresh);
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
-			<< INFINIPATH_S_UPDTHRESH_SHIFT);
-		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-			<< INFINIPATH_S_UPDTHRESH_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-}
-
-/**
- * ipath_create_rcvhdrq - create a receive header queue
- * @dd: the infinipath device
- * @pd: the port data
- *
- * this must be contiguous memory (from an i/o perspective), and must be
- * DMA'able (which means for some systems, it will go through an IOMMU,
- * or be forced into a low address range).
- */
-int ipath_create_rcvhdrq(struct ipath_devdata *dd,
-			 struct ipath_portdata *pd)
-{
-	int ret = 0;
-
-	if (!pd->port_rcvhdrq) {
-		dma_addr_t phys_hdrqtail;
-		gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-		int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-				sizeof(u32), PAGE_SIZE);
-
-		pd->port_rcvhdrq = dma_alloc_coherent(
-			&dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
-			gfp_flags);
-
-		if (!pd->port_rcvhdrq) {
-			ipath_dev_err(dd, "attempt to allocate %d bytes "
-				      "for port %u rcvhdrq failed\n",
-				      amt, pd->port_port);
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-			pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
-				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
-				GFP_KERNEL);
-			if (!pd->port_rcvhdrtail_kvaddr) {
-				ipath_dev_err(dd, "attempt to allocate 1 page "
-					"for port %u rcvhdrqtailaddr "
-					"failed\n", pd->port_port);
-				ret = -ENOMEM;
-				dma_free_coherent(&dd->pcidev->dev, amt,
-					pd->port_rcvhdrq,
-					pd->port_rcvhdrq_phys);
-				pd->port_rcvhdrq = NULL;
-				goto bail;
-			}
-			pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
-			ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
-				   "physical\n", pd->port_port,
-				   (unsigned long long) phys_hdrqtail);
-		}
-
-		pd->port_rcvhdrq_size = amt;
-
-		ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
-			   "for port %u rcvhdr Q\n",
-			   amt >> PAGE_SHIFT, pd->port_rcvhdrq,
-			   (unsigned long) pd->port_rcvhdrq_phys,
-			   (unsigned long) pd->port_rcvhdrq_size,
-			   pd->port_port);
-	} else {
-		ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
-			   "hdrtailaddr@%p %llx physical\n",
-			   pd->port_port, pd->port_rcvhdrq,
-			   (unsigned long long) pd->port_rcvhdrq_phys,
-			   pd->port_rcvhdrtail_kvaddr, (unsigned long long)
-			   pd->port_rcvhdrqtailaddr_phys);
-	}
-	/* clear for security and sanity on each use */
-	memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-	if (pd->port_rcvhdrtail_kvaddr)
-		memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
-
-	/*
-	 * tell chip each time we init it, even if we are re-using previous
-	 * memory (we zero the register at process close)
-	 */
-	ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
-			      pd->port_port, pd->port_rcvhdrqtailaddr_phys);
-	ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-			      pd->port_port, pd->port_rcvhdrq_phys);
-
-bail:
-	return ret;
-}
-
-
-/*
- * Flush all sends that might be in the ready to send state, as well as any
- * that are in the process of being sent.   Used whenever we need to be
- * sure the send side is idle.  Cleans up all buffer state by canceling
- * all pio buffers, and issuing an abort, which cleans up anything in the
- * launch fifo.  The cancel is superfluous on some chip versions, but
- * it's safer to always do it.
- * PIOAvail bits are updated by the chip as if normal send had happened.
- */
-void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
-{
-	unsigned long flags;
-
-	if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
-		ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
-		goto bail;
-	}
-	/*
-	 * If we have SDMA, and it's not disabled, we have to kick off the
-	 * abort state machine, provided we aren't already aborting.
-	 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
-	 * we skip the rest of this routine. It is already "in progress"
-	 */
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
-		int skip_cancel;
-		unsigned long *statp = &dd->ipath_sdma_status;
-
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		skip_cancel =
-			test_and_set_bit(IPATH_SDMA_ABORTING, statp)
-			&& !test_bit(IPATH_SDMA_DISABLED, statp);
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-		if (skip_cancel)
-			goto bail;
-	}
-
-	ipath_dbg("Cancelling all in-progress send buffers\n");
-
-	/* skip armlaunch errs for a while */
-	dd->ipath_lastcancel = jiffies + HZ / 2;
-
-	/*
-	 * The abort bit is auto-clearing.  We also don't want pioavail
-	 * update happening during this, and we don't want any other
-	 * sends going out, so turn those off for the duration.  We read
-	 * the scratch register to be sure that cancels and the abort
-	 * have taken effect in the chip.  Otherwise two parts are same
-	 * as ipath_force_pio_avail_update()
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
-		| INFINIPATH_S_PIOENABLE);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		dd->ipath_sendctrl | INFINIPATH_S_ABORT);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/* disarm all send buffers */
-	ipath_disarm_piobufs(dd, 0,
-		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-
-	if (restore_sendctrl) {
-		/* else done by caller later if needed */
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
-			INFINIPATH_S_PIOENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		/* and again, be sure all have hit the chip */
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-
-	if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
-	    !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
-	    test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		/* only wait so long for intr */
-		dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
-		dd->ipath_sdma_reset_wait = 200;
-		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	}
-bail:;
-}
-
-/*
- * Force an update of in-memory copy of the pioavail registers, when
- * needed for any of a variety of reasons.  We read the scratch register
- * to make it highly likely that the update will have happened by the
- * time we return.  If already off (as in cancel_sends above), this
- * routine is a nop, on the assumption that the caller will "do the
- * right thing".
- */
-void ipath_force_pio_avail_update(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	}
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
-				int linitcmd)
-{
-	u64 mod_wd;
-	static const char *what[4] = {
-		[0] = "NOP",
-		[INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
-		[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
-		[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
-	};
-
-	if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
-		/*
-		 * If we are told to disable, note that so link-recovery
-		 * code does not attempt to bring us back up.
-		 */
-		preempt_disable();
-		dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-		preempt_enable();
-	} else if (linitcmd) {
-		/*
-		 * Any other linkinitcmd will lead to LINKDOWN and then
-		 * to INIT (if all is well), so clear flag to let
-		 * link-recovery code attempt to bring us back up.
-		 */
-		preempt_disable();
-		dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-		preempt_enable();
-	}
-
-	mod_wd = (linkcmd << dd->ibcc_lc_shift) |
-		(linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-	ipath_cdbg(VERBOSE,
-		"Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
-		dd->ipath_unit, what[linkcmd], linitcmd,
-		ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
-			ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl | mod_wd);
-	/* read from chip so write is flushed */
-	(void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-}
-
-int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
-	u32 lstate;
-	int ret;
-
-	switch (newstate) {
-	case IPATH_IB_LINKDOWN_ONLY:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-					INFINIPATH_IBCC_LINKINITCMD_POLL);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_SLEEP:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-					INFINIPATH_IBCC_LINKINITCMD_SLEEP);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_DISABLE:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-					INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKARM:
-		if (dd->ipath_flags & IPATH_LINKARMED) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags &
-		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
-
-		/*
-		 * Since the port can transition to ACTIVE by receiving
-		 * a non VL 15 packet, wait for either state.
-		 */
-		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINKACTIVE:
-		if (dd->ipath_flags & IPATH_LINKACTIVE) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
-		lstate = IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINK_LOOPBACK:
-		dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-
-		/* turn heartbeat off, as it causes loopback to fail */
-		dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-				       IPATH_IB_HRTBT_OFF);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINK_EXTERNAL:
-		dev_info(&dd->pcidev->dev,
-			"Disabling IB local loopback (normal)\n");
-		dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-				       IPATH_IB_HRTBT_ON);
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	/*
-	 * Heartbeat can be explicitly enabled by the user via
-	 * "hrtbt_enable" "file", and if disabled, trying to enable here
-	 * will have no effect.  Implicit changes (heartbeat off when
-	 * loopback on, and vice versa) are included to ease testing.
-	 */
-	case IPATH_IB_LINK_HRTBT:
-		ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-			IPATH_IB_HRTBT_ON);
-		goto bail;
-
-	case IPATH_IB_LINK_NO_HRTBT:
-		ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-			IPATH_IB_HRTBT_OFF);
-		goto bail;
-
-	default:
-		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link INIT state...
- */
-int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-	u32 piosize;
-	int changed = 0;
-	int ret;
-
-	/*
-	 * mtu is IB data payload max.  It's the largest power of 2 less
-	 * than piosize (or even larger, since it only really controls the
-	 * largest we can receive; we can send the max of the mtu and
-	 * piosize).  We check that it's one of the valid IB sizes.
-	 */
-	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-	    (arg != 4096 || !ipath_mtu4096)) {
-		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (dd->ipath_ibmtu == arg) {
-		ret = 0;        /* same as current */
-		goto bail;
-	}
-
-	piosize = dd->ipath_ibmaxlen;
-	dd->ipath_ibmtu = arg;
-
-	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-		/* Only if it's not the initial value (or reset to it) */
-		if (piosize != dd->ipath_init_ibmaxlen) {
-			if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
-				piosize = dd->ipath_init_ibmaxlen;
-			dd->ipath_ibmaxlen = piosize;
-			changed = 1;
-		}
-	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-		piosize = arg + IPATH_PIO_MAXIBHDR;
-		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-			   arg);
-		dd->ipath_ibmaxlen = piosize;
-		changed = 1;
-	}
-
-	if (changed) {
-		u64 ibc = dd->ipath_ibcctrl, ibdw;
-		/*
-		 * update our housekeeping variables, and set IBC max
-		 * size, same as init code; max IBC is max we allow in
-		 * buffer, less the qword pbc, plus 1 for ICRC, in dwords
-		 */
-		dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
-		ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
-		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-			 dd->ibcc_mpl_shift);
-		ibc |= ibdw << dd->ibcc_mpl_shift;
-		dd->ipath_ibcctrl = ibc;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		dd->ipath_f_tidtemplate(dd);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
-{
-	dd->ipath_lid = lid;
-	dd->ipath_lmc = lmc;
-
-	dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
-		(~((1U << lmc) - 1)) << 16);
-
-	dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
-
-	return 0;
-}
-
-
-/**
- * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to write
- * @port: the port containing the register
- * @value: the value to write
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
-			  unsigned port, u64 value)
-{
-	u16 where;
-
-	if (port < dd->ipath_portcnt &&
-	    (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-	     regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-		where = regno + port;
-	else
-		where = -1;
-
-	ipath_write_kreg(dd, where, value);
-}
-
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDS, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
-
-static void ipath_run_led_override(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-	int timeoff;
-	int pidx;
-	u64 lstate, ltstate, val;
-
-	if (!(dd->ipath_flags & IPATH_INITTED))
-		return;
-
-	pidx = dd->ipath_led_override_phase++ & 1;
-	dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
-	timeoff = dd->ipath_led_override_timeoff;
-
-	/*
-	 * below potentially restores the LED values per current status,
-	 * should also possibly setup the traffic-blink register,
-	 * but leave that to per-chip functions.
-	 */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-	ltstate = ipath_ib_linktrstate(dd, val);
-	lstate = ipath_ib_linkstate(dd, val);
-
-	dd->ipath_f_setextled(dd, lstate, ltstate);
-	mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
-}
-
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
-{
-	int timeoff, freq;
-
-	if (!(dd->ipath_flags & IPATH_INITTED))
-		return;
-
-	/* First check if we are blinking. If not, use 1HZ polling */
-	timeoff = HZ;
-	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
-
-	if (freq) {
-		/* For blink, set each phase from one nybble of val */
-		dd->ipath_led_override_vals[0] = val & 0xF;
-		dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
-		timeoff = (HZ << 4)/freq;
-	} else {
-		/* Non-blink set both phases the same. */
-		dd->ipath_led_override_vals[0] = val & 0xF;
-		dd->ipath_led_override_vals[1] = val & 0xF;
-	}
-	dd->ipath_led_override_timeoff = timeoff;
-
-	/*
-	 * If the timer has not already been started, do so. Use a "quick"
-	 * timeout so the function will be called soon, to look at our request.
-	 */
-	if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
-		/* Need to start timer */
-		setup_timer(&dd->ipath_led_override_timer,
-				ipath_run_led_override, (unsigned long)dd);
-
-		dd->ipath_led_override_timer.expires = jiffies + 1;
-		add_timer(&dd->ipath_led_override_timer);
-	} else
-		atomic_dec(&dd->ipath_led_override_timer_active);
-}
-
-/**
- * ipath_shutdown_device - shut down a device
- * @dd: the infinipath device
- *
- * This is called to make the device quiet when we are about to
- * unload the driver, and also when the device is administratively
- * disabled.   It does not free any data structures.
- * Everything it does has to be setup again by ipath_init_chip(dd,1)
- */
-void ipath_shutdown_device(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	ipath_dbg("Shutting down the device\n");
-
-	ipath_hol_up(dd); /* make sure user processes aren't suspended */
-
-	dd->ipath_flags |= IPATH_LINKUNK;
-	dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
-			     IPATH_LINKINIT | IPATH_LINKARMED |
-			     IPATH_LINKACTIVE);
-	*dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
-				IPATH_STATUS_IB_READY);
-
-	/* mask interrupts, but not errors */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-	dd->ipath_rcvctrl = 0;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		teardown_sdma(dd);
-
-	/*
-	 * gracefully stop all sends allowing any in progress to trickle out
-	 * first.
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl = 0;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	/* flush it */
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/*
-	 * enough for anything that's going to trickle out to have actually
-	 * done so.
-	 */
-	udelay(5);
-
-	dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
-
-	ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-	ipath_cancel_sends(dd, 0);
-
-	/*
-	 * we are shutting down, so tell components that care.  We don't do
-	 * this on just a link state change, much like ethernet, a cable
-	 * unplug, etc. doesn't change driver state
-	 */
-	signal_ib_event(dd, IB_EVENT_PORT_ERR);
-
-	/* disable IBC */
-	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control | INFINIPATH_C_FREEZEMODE);
-
-	/*
-	 * clear SerdesEnable and turn the leds off; do this here because
-	 * we are unloading, so don't count on interrupts to move along
-	 * Turn the LEDs off explicitly for the same reason.
-	 */
-	dd->ipath_f_quiet_serdes(dd);
-
-	/* stop all the timers that might still be running */
-	del_timer_sync(&dd->ipath_hol_timer);
-	if (dd->ipath_stats_timer_active) {
-		del_timer_sync(&dd->ipath_stats_timer);
-		dd->ipath_stats_timer_active = 0;
-	}
-	if (dd->ipath_intrchk_timer.data) {
-		del_timer_sync(&dd->ipath_intrchk_timer);
-		dd->ipath_intrchk_timer.data = 0;
-	}
-	if (atomic_read(&dd->ipath_led_override_timer_active)) {
-		del_timer_sync(&dd->ipath_led_override_timer);
-		atomic_set(&dd->ipath_led_override_timer_active, 0);
-	}
-
-	/*
-	 * clear all interrupts and errors, so that the next time the driver
-	 * is loaded or device is enabled, we know that whatever is set
-	 * happened while we were unloaded
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-	ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
-	ipath_update_eeprom_log(dd);
-}
-
-/**
- * ipath_free_pddata - free a port's allocated data
- * @dd: the infinipath device
- * @pd: the portdata structure
- *
- * free up any allocated data for a port
- * This should not touch anything that would affect a simultaneous
- * re-allocation of port data, because it is called after ipath_mutex
- * is released (and can be called from reinit as well).
- * It should never change any chip state, or global driver state.
- * (The only exception to global state is freeing the port0 port0_skbs.)
- */
-void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
-{
-	if (!pd)
-		return;
-
-	if (pd->port_rcvhdrq) {
-		ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
-			   "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
-			   (unsigned long) pd->port_rcvhdrq_size);
-		dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
-				  pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
-		pd->port_rcvhdrq = NULL;
-		if (pd->port_rcvhdrtail_kvaddr) {
-			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-					 pd->port_rcvhdrtail_kvaddr,
-					 pd->port_rcvhdrqtailaddr_phys);
-			pd->port_rcvhdrtail_kvaddr = NULL;
-		}
-	}
-	if (pd->port_port && pd->port_rcvegrbuf) {
-		unsigned e;
-
-		for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-			void *base = pd->port_rcvegrbuf[e];
-			size_t size = pd->port_rcvegrbuf_size;
-
-			ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
-				   "chunk %u/%u\n", base,
-				   (unsigned long) size,
-				   e, pd->port_rcvegrbuf_chunks);
-			dma_free_coherent(&dd->pcidev->dev, size,
-				base, pd->port_rcvegrbuf_phys[e]);
-		}
-		kfree(pd->port_rcvegrbuf);
-		pd->port_rcvegrbuf = NULL;
-		kfree(pd->port_rcvegrbuf_phys);
-		pd->port_rcvegrbuf_phys = NULL;
-		pd->port_rcvegrbuf_chunks = 0;
-	} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
-		unsigned e;
-		struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
-
-		dd->ipath_port0_skbinfo = NULL;
-		ipath_cdbg(VERBOSE, "free closed port %d "
-			   "ipath_port0_skbinfo @ %p\n", pd->port_port,
-			   skbinfo);
-		for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
-			if (skbinfo[e].skb) {
-				pci_unmap_single(dd->pcidev, skbinfo[e].phys,
-						 dd->ipath_ibmaxlen,
-						 PCI_DMA_FROMDEVICE);
-				dev_kfree_skb(skbinfo[e].skb);
-			}
-		vfree(skbinfo);
-	}
-	kfree(pd->port_tid_pg_list);
-	vfree(pd->subport_uregbase);
-	vfree(pd->subport_rcvegrbuf);
-	vfree(pd->subport_rcvhdr_base);
-	kfree(pd);
-}
-
-static int __init infinipath_init(void)
-{
-	int ret;
-
-	if (ipath_debug & __IPATH_DBG)
-		printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
-
-	/*
-	 * These must be called before the driver is registered with
-	 * the PCI subsystem.
-	 */
-	idr_init(&unit_table);
-
-	ret = pci_register_driver(&ipath_driver);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Unable to register driver: error %d\n", -ret);
-		goto bail_unit;
-	}
-
-	ret = ipath_init_ipathfs();
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
-		       "ipathfs: error %d\n", -ret);
-		goto bail_pci;
-	}
-
-	goto bail;
-
-bail_pci:
-	pci_unregister_driver(&ipath_driver);
-
-bail_unit:
-	idr_destroy(&unit_table);
-
-bail:
-	return ret;
-}
-
-static void __exit infinipath_cleanup(void)
-{
-	ipath_exit_ipathfs();
-
-	ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
-	pci_unregister_driver(&ipath_driver);
-
-	idr_destroy(&unit_table);
-}
-
-/**
- * ipath_reset_device - reset the chip if possible
- * @unit: the device to reset
- *
- * Whether or not reset is successful, we attempt to re-initialize the chip
- * (that is, much like a driver unload/reload).  We clear the INITTED flag
- * so that the various entry points will fail until we reinitialize.  For
- * now, we only allow this if no user ports are open that use chip resources
- */
-int ipath_reset_device(int unit)
-{
-	int ret, i;
-	struct ipath_devdata *dd = ipath_lookup(unit);
-	unsigned long flags;
-
-	if (!dd) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (atomic_read(&dd->ipath_led_override_timer_active)) {
-		/* Need to stop LED timer, _then_ shut off LEDs */
-		del_timer_sync(&dd->ipath_led_override_timer);
-		atomic_set(&dd->ipath_led_override_timer_active, 0);
-	}
-
-	/* Shut off LEDs after we are sure timer is not running */
-	dd->ipath_led_override = LED_OVER_BOTH_OFF;
-	dd->ipath_f_setextled(dd, 0, 0);
-
-	dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
-
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
-		dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
-			 "not initialized or not present\n", unit);
-		ret = -ENXIO;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	if (dd->ipath_pd)
-		for (i = 1; i < dd->ipath_cfgports; i++) {
-			if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-				continue;
-			spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-			ipath_dbg("unit %u port %d is in use "
-				  "(PID %u cmd %s), can't reset\n",
-				  unit, i,
-				  pid_nr(dd->ipath_pd[i]->port_pid),
-				  dd->ipath_pd[i]->port_comm);
-			ret = -EBUSY;
-			goto bail;
-		}
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		teardown_sdma(dd);
-
-	dd->ipath_flags &= ~IPATH_INITTED;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-	ret = dd->ipath_f_reset(dd);
-	if (ret == 1) {
-		ipath_dbg("Reinitializing unit %u after reset attempt\n",
-			  unit);
-		ret = ipath_init_chip(dd, 1);
-	} else
-		ret = -EAGAIN;
-	if (ret)
-		ipath_dev_err(dd, "Reinitialize unit %u after "
-			      "reset failed with %d\n", unit, ret);
-	else
-		dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
-			 "resetting\n", unit);
-
-bail:
-	return ret;
-}
-
-/*
- * send a signal to all the processes that have the driver open
- * through the normal interfaces (i.e., everything other than diags
- * interface).  Returns number of signalled processes.
- */
-static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
-{
-	int i, sub, any = 0;
-	struct pid *pid;
-	unsigned long flags;
-
-	if (!dd->ipath_pd)
-		return 0;
-
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	for (i = 1; i < dd->ipath_cfgports; i++) {
-		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-			continue;
-		pid = dd->ipath_pd[i]->port_pid;
-		if (!pid)
-			continue;
-
-		dev_info(&dd->pcidev->dev, "context %d in use "
-			  "(PID %u), sending signal %d\n",
-			  i, pid_nr(pid), sig);
-		kill_pid(pid, sig, 1);
-		any++;
-		for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
-			pid = dd->ipath_pd[i]->port_subpid[sub];
-			if (!pid)
-				continue;
-			dev_info(&dd->pcidev->dev, "sub-context "
-				"%d:%d in use (PID %u), sending "
-				"signal %d\n", i, sub, pid_nr(pid), sig);
-			kill_pid(pid, sig, 1);
-			any++;
-		}
-	}
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-	return any;
-}
-
-static void ipath_hol_signal_down(struct ipath_devdata *dd)
-{
-	if (ipath_signal_procs(dd, SIGSTOP))
-		ipath_dbg("Stopped some processes\n");
-	ipath_cancel_sends(dd, 1);
-}
-
-
-static void ipath_hol_signal_up(struct ipath_devdata *dd)
-{
-	if (ipath_signal_procs(dd, SIGCONT))
-		ipath_dbg("Continued some processes\n");
-}
-
-/*
- * link is down, stop any users processes, and flush pending sends
- * to prevent HoL blocking, then start the HoL timer that
- * periodically continues, then stop procs, so they can detect
- * link down if they want, and do something about it.
- * Timer may already be running, so use mod_timer, not add_timer.
- */
-void ipath_hol_down(struct ipath_devdata *dd)
-{
-	dd->ipath_hol_state = IPATH_HOL_DOWN;
-	ipath_hol_signal_down(dd);
-	dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-	dd->ipath_hol_timer.expires = jiffies +
-		msecs_to_jiffies(ipath_hol_timeout_ms);
-	mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
-}
-
-/*
- * link is up, continue any user processes, and ensure timer
- * is a nop, if running.  Let timer keep running, if set; it
- * will nop when it sees the link is up
- */
-void ipath_hol_up(struct ipath_devdata *dd)
-{
-	ipath_hol_signal_up(dd);
-	dd->ipath_hol_state = IPATH_HOL_UP;
-}
-
-/*
- * toggle the running/not running state of user proceses
- * to prevent HoL blocking on chip resources, but still allow
- * user processes to do link down special case handling.
- * Should only be called via the timer
- */
-void ipath_hol_event(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-	if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
-		&& dd->ipath_hol_state != IPATH_HOL_UP) {
-		dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-		ipath_dbg("Stopping processes\n");
-		ipath_hol_signal_down(dd);
-	} else { /* may do "extra" if also in ipath_hol_up() */
-		dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
-		ipath_dbg("Continuing processes\n");
-		ipath_hol_signal_up(dd);
-	}
-	if (dd->ipath_hol_state == IPATH_HOL_UP)
-		ipath_dbg("link's up, don't resched timer\n");
-	else {
-		dd->ipath_hol_timer.expires = jiffies +
-			msecs_to_jiffies(ipath_hol_timeout_ms);
-		mod_timer(&dd->ipath_hol_timer,
-			dd->ipath_hol_timer.expires);
-	}
-}
-
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
-{
-	u64 val;
-
-	if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
-		return -1;
-	if (dd->ipath_rx_pol_inv != new_pol_inv) {
-		dd->ipath_rx_pol_inv = new_pol_inv;
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-			 INFINIPATH_XGXS_RX_POL_SHIFT);
-		val |= ((u64)dd->ipath_rx_pol_inv) <<
-			INFINIPATH_XGXS_RX_POL_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-	}
-	return 0;
-}
-
-/*
- * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
- * the 7220, which is count-based, rather than trigger-based.  Safe for the
- * driver check, since it's at init.   Not completely safe when used for
- * user-mode checking, since some error checking can be lost, but not
- * particularly risky, and only has problematic side-effects in the face of
- * very buggy user code.  There is no reference counting, but that's also
- * fine, given the intended use.
- */
-void ipath_enable_armlaunch(struct ipath_devdata *dd)
-{
-	dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-		INFINIPATH_E_SPIOARMLAUNCH);
-	dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-}
-
-void ipath_disable_armlaunch(struct ipath_devdata *dd)
-{
-	/* so don't re-enable if already set */
-	dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
-	dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-}
-
-module_init(infinipath_init);
-module_exit(infinipath_cleanup);
diff --git a/drivers/staging/rdma/ipath/ipath_eeprom.c b/drivers/staging/rdma/ipath/ipath_eeprom.c
deleted file mode 100644
index ef84107..0000000
--- a/drivers/staging/rdma/ipath/ipath_eeprom.c
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-
-/*
- * InfiniPath I2C driver for a serial eeprom.  This is not a generic
- * I2C interface.  For a start, the device we're using (Atmel AT24C11)
- * doesn't work like a regular I2C device.  It looks like one
- * electrically, but not logically.  Normal I2C devices have a single
- * 7-bit or 10-bit I2C address that they respond to.  Valid 7-bit
- * addresses range from 0x03 to 0x77.  Addresses 0x00 to 0x02 and 0x78
- * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
- * call" address.)  The Atmel device, on the other hand, responds to ALL
- * 7-bit addresses.  It's designed to be the only device on a given I2C
- * bus.  A 7-bit address corresponds to the memory address within the
- * Atmel device itself.
- *
- * Also, the timing requirements mean more than simple software
- * bitbanging, with readbacks from chip to ensure timing (simple udelay
- * is not enough).
- *
- * This all means that accessing the device is specialized enough
- * that using the standard kernel I2C bitbanging interface would be
- * impossible.  For example, the core I2C eeprom driver expects to find
- * a device at one or more of a limited set of addresses only.  It doesn't
- * allow writing to an eeprom.  It also doesn't provide any means of
- * accessing eeprom contents from within the kernel, only via sysfs.
- */
-
-/* Added functionality for IBA7220-based cards */
-#define IPATH_EEPROM_DEV_V1 0xA0
-#define IPATH_EEPROM_DEV_V2 0xA2
-#define IPATH_TEMP_DEV 0x98
-#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
-#define IPATH_NO_DEV (0xFF)
-
-/*
- * The number of I2C chains is proliferating. Table below brings
- * some order to the madness. The basic principle is that the
- * table is scanned from the top, and a "probe" is made to the
- * device probe_dev. If that succeeds, the chain is considered
- * to be of that type, and dd->i2c_chain_type is set to the index+1
- * of the entry.
- * The +1 is so static initialization can mean "unknown, do probe."
- */
-static struct i2c_chain_desc {
-	u8 probe_dev;	/* If seen at probe, chain is this type */
-	u8 eeprom_dev;	/* Dev addr (if any) for EEPROM */
-	u8 temp_dev;	/* Dev Addr (if any) for Temp-sense */
-} i2c_chains[] = {
-	{ IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
-	{ IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
-	{ IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
-	{ IPATH_NO_DEV }
-};
-
-enum i2c_type {
-	i2c_line_scl = 0,
-	i2c_line_sda
-};
-
-enum i2c_state {
-	i2c_line_low = 0,
-	i2c_line_high
-};
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_gpio_set - set a GPIO line
- * @dd: the infinipath device
- * @line: the line to set
- * @new_line_state: the state to set
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.
- */
-static int i2c_gpio_set(struct ipath_devdata *dd,
-			enum i2c_type line,
-			enum i2c_state new_line_state)
-{
-	u64 out_mask, dir_mask, *gpioval;
-	unsigned long flags = 0;
-
-	gpioval = &dd->ipath_gpio_out;
-
-	if (line == i2c_line_scl) {
-		dir_mask = dd->ipath_gpio_scl;
-		out_mask = (1UL << dd->ipath_gpio_scl_num);
-	} else {
-		dir_mask = dd->ipath_gpio_sda;
-		out_mask = (1UL << dd->ipath_gpio_sda_num);
-	}
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	if (new_line_state == i2c_line_high) {
-		/* tri-state the output rather than force high */
-		dd->ipath_extctrl &= ~dir_mask;
-	} else {
-		/* config line to be an output */
-		dd->ipath_extctrl |= dir_mask;
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-
-	/* set output as well (no real verify) */
-	if (new_line_state == i2c_line_high)
-		*gpioval |= out_mask;
-	else
-		*gpioval &= ~out_mask;
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-	return 0;
-}
-
-/**
- * i2c_gpio_get - get a GPIO line state
- * @dd: the infinipath device
- * @line: the line to get
- * @curr_statep: where to put the line state
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.  curr_state is not set on error.
- */
-static int i2c_gpio_get(struct ipath_devdata *dd,
-			enum i2c_type line,
-			enum i2c_state *curr_statep)
-{
-	u64 read_val, mask;
-	int ret;
-	unsigned long flags = 0;
-
-	/* check args */
-	if (curr_statep == NULL) {
-		ret = 1;
-		goto bail;
-	}
-
-	/* config line to be an input */
-	if (line == i2c_line_scl)
-		mask = dd->ipath_gpio_scl;
-	else
-		mask = dd->ipath_gpio_sda;
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	dd->ipath_extctrl &= ~mask;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-	/*
-	 * Below is very unlikely to reflect true input state if Output
-	 * Enable actually changed.
-	 */
-	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-	if (read_val & mask)
-		*curr_statep = i2c_line_high;
-	else
-		*curr_statep = i2c_line_low;
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the infinipath device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip.  Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct ipath_devdata *dd)
-{
-	(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-	rmb();
-}
-
-static void scl_out(struct ipath_devdata *dd, u8 bit)
-{
-	udelay(1);
-	i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
-
-	i2c_wait_for_writes(dd);
-}
-
-static void sda_out(struct ipath_devdata *dd, u8 bit)
-{
-	i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
-
-	i2c_wait_for_writes(dd);
-}
-
-static u8 sda_in(struct ipath_devdata *dd, int wait)
-{
-	enum i2c_state bit;
-
-	if (i2c_gpio_get(dd, i2c_line_sda, &bit))
-		ipath_dbg("get bit failed!\n");
-
-	if (wait)
-		i2c_wait_for_writes(dd);
-
-	return bit == i2c_line_high ? 1U : 0;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the infinipath device
- */
-static int i2c_ackrcv(struct ipath_devdata *dd)
-{
-	u8 ack_received;
-
-	/* AT ENTRY SCL = LOW */
-	/* change direction, ignore data */
-	ack_received = sda_in(dd, 1);
-	scl_out(dd, i2c_line_high);
-	ack_received = sda_in(dd, 1) == 0;
-	scl_out(dd, i2c_line_low);
-	return ack_received;
-}
-
-/**
- * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
- * @dd: the infinipath device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct ipath_devdata *dd)
-{
-	int bit_cntr, data;
-
-	data = 0;
-
-	for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
-		data <<= 1;
-		scl_out(dd, i2c_line_high);
-		data |= sda_in(dd, 0);
-		scl_out(dd, i2c_line_low);
-	}
-	return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the infinipath device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct ipath_devdata *dd, u8 data)
-{
-	int bit_cntr;
-	u8 bit;
-
-	for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
-		bit = (data >> bit_cntr) & 1;
-		sda_out(dd, bit);
-		scl_out(dd, i2c_line_high);
-		scl_out(dd, i2c_line_low);
-	}
-	return (!i2c_ackrcv(dd)) ? 1 : 0;
-}
-
-static void send_ack(struct ipath_devdata *dd)
-{
-	sda_out(dd, i2c_line_low);
-	scl_out(dd, i2c_line_high);
-	scl_out(dd, i2c_line_low);
-	sda_out(dd, i2c_line_high);
-}
-
-/**
- * i2c_startcmd - transmit the start condition, followed by address/cmd
- * @dd: the infinipath device
- * @offset_dir: direction byte
- *
- *      (both clock/data high, clock high, data low while clock is high)
- */
-static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
-{
-	int res;
-
-	/* issue start sequence */
-	sda_out(dd, i2c_line_high);
-	scl_out(dd, i2c_line_high);
-	sda_out(dd, i2c_line_low);
-	scl_out(dd, i2c_line_low);
-
-	/* issue length and direction byte */
-	res = wr_byte(dd, offset_dir);
-
-	if (res)
-		ipath_cdbg(VERBOSE, "No ack to complete start\n");
-
-	return res;
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the infinipath device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct ipath_devdata *dd)
-{
-	scl_out(dd, i2c_line_low);
-	sda_out(dd, i2c_line_low);
-	scl_out(dd, i2c_line_high);
-	sda_out(dd, i2c_line_high);
-	udelay(2);
-}
-
-/**
- * eeprom_reset - reset I2C communication
- * @dd: the infinipath device
- */
-
-static int eeprom_reset(struct ipath_devdata *dd)
-{
-	int clock_cycles_left = 9;
-	u64 *gpioval = &dd->ipath_gpio_out;
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	/* Make sure shadows are consistent */
-	dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-	*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-	ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
-		   "is %llx\n", (unsigned long long) *gpioval);
-
-	/*
-	 * This is to get the i2c into a known state, by first going low,
-	 * then tristate sda (and then tristate scl as first thing
-	 * in loop)
-	 */
-	scl_out(dd, i2c_line_low);
-	sda_out(dd, i2c_line_high);
-
-	/* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
-	while (clock_cycles_left--) {
-		scl_out(dd, i2c_line_high);
-
-		/* SDA seen high, issue START by dropping it while SCL high */
-		if (sda_in(dd, 0)) {
-			sda_out(dd, i2c_line_low);
-			scl_out(dd, i2c_line_low);
-			/* ATMEL spec says must be followed by STOP. */
-			scl_out(dd, i2c_line_high);
-			sda_out(dd, i2c_line_high);
-			ret = 0;
-			goto bail;
-		}
-
-		scl_out(dd, i2c_line_low);
-	}
-
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/*
- * Probe for I2C device at specified address. Returns 0 for "success"
- * to match rest of this file.
- * Leave bus in "reasonable" state for further commands.
- */
-static int i2c_probe(struct ipath_devdata *dd, int devaddr)
-{
-	int ret;
-
-	ret = eeprom_reset(dd);
-	if (ret) {
-		ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
-			      devaddr);
-		return ret;
-	}
-	/*
-	 * Reset no longer leaves bus in start condition, so normal
-	 * i2c_startcmd() will do.
-	 */
-	ret = i2c_startcmd(dd, devaddr | READ_CMD);
-	if (ret)
-		ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
-			   devaddr);
-	else {
-		/*
-		 * Device did respond. Complete a single-byte read, because some
-		 * devices apparently cannot handle STOP immediately after they
-		 * ACK the start-cmd.
-		 */
-		int data;
-		data = rd_byte(dd);
-		stop_cmd(dd);
-		ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
-	}
-	return ret;
-}
-
-/*
- * Returns the "i2c type". This is a pointer to a struct that describes
- * the I2C chain on this board. To minimize impact on struct ipath_devdata,
- * the (small integer) index into the table is actually memoized, rather
- * then the pointer.
- * Memoization is because the type is determined on the first call per chip.
- * An alternative would be to move type determination to early
- * init code.
- */
-static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
-{
-	int idx;
-
-	/* Get memoized index, from previous successful probes */
-	idx = dd->ipath_i2c_chain_type - 1;
-	if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
-		goto done;
-
-	idx = 0;
-	while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
-		/* if probe succeeds, this is type */
-		if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
-			break;
-		++idx;
-	}
-
-	/*
-	 * Old EEPROM (first entry) may require a reset after probe,
-	 * rather than being able to "start" after "stop"
-	 */
-	if (idx == 0)
-		eeprom_reset(dd);
-
-	if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
-		idx = -1;
-	else
-		dd->ipath_i2c_chain_type = idx + 1;
-done:
-	return (idx >= 0) ? i2c_chains + idx : NULL;
-}
-
-static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
-					u8 eeprom_offset, void *buffer, int len)
-{
-	int ret;
-	struct i2c_chain_desc *icd;
-	u8 *bp = buffer;
-
-	ret = 1;
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	if (icd->eeprom_dev == IPATH_NO_DEV) {
-		/* legacy not-really-I2C */
-		ipath_cdbg(VERBOSE, "Start command only address\n");
-		eeprom_offset = (eeprom_offset << 1) | READ_CMD;
-		ret = i2c_startcmd(dd, eeprom_offset);
-	} else {
-		/* Actual I2C */
-		ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
-		if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-			ipath_dbg("Failed EEPROM startcmd\n");
-			stop_cmd(dd);
-			ret = 1;
-			goto bail;
-		}
-		ret = wr_byte(dd, eeprom_offset);
-		stop_cmd(dd);
-		if (ret) {
-			ipath_dev_err(dd, "Failed to write EEPROM address\n");
-			ret = 1;
-			goto bail;
-		}
-		ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
-	}
-	if (ret) {
-		ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
-		stop_cmd(dd);
-		ret = 1;
-		goto bail;
-	}
-
-	/*
-	 * eeprom keeps clocking data out as long as we ack, automatically
-	 * incrementing the address.
-	 */
-	while (len-- > 0) {
-		/* get and store data */
-		*bp++ = rd_byte(dd);
-		/* send ack if not the last byte */
-		if (len)
-			send_ack(dd);
-	}
-
-	stop_cmd(dd);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
-				       const void *buffer, int len)
-{
-	int sub_len;
-	const u8 *bp = buffer;
-	int max_wait_time, i;
-	int ret;
-	struct i2c_chain_desc *icd;
-
-	ret = 1;
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	while (len > 0) {
-		if (icd->eeprom_dev == IPATH_NO_DEV) {
-			if (i2c_startcmd(dd,
-					 (eeprom_offset << 1) | WRITE_CMD)) {
-				ipath_dbg("Failed to start cmd offset %u\n",
-					eeprom_offset);
-				goto failed_write;
-			}
-		} else {
-			/* Real I2C */
-			if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-				ipath_dbg("Failed EEPROM startcmd\n");
-				goto failed_write;
-			}
-			ret = wr_byte(dd, eeprom_offset);
-			if (ret) {
-				ipath_dev_err(dd, "Failed to write EEPROM "
-					      "address\n");
-				goto failed_write;
-			}
-		}
-
-		sub_len = min(len, 4);
-		eeprom_offset += sub_len;
-		len -= sub_len;
-
-		for (i = 0; i < sub_len; i++) {
-			if (wr_byte(dd, *bp++)) {
-				ipath_dbg("no ack after byte %u/%u (%u "
-					  "total remain)\n", i, sub_len,
-					  len + sub_len - i);
-				goto failed_write;
-			}
-		}
-
-		stop_cmd(dd);
-
-		/*
-		 * wait for write complete by waiting for a successful
-		 * read (the chip replies with a zero after the write
-		 * cmd completes, and before it writes to the eeprom.
-		 * The startcmd for the read will fail the ack until
-		 * the writes have completed.   We do this inline to avoid
-		 * the debug prints that are in the real read routine
-		 * if the startcmd fails.
-		 * We also use the proper device address, so it doesn't matter
-		 * whether we have real eeprom_dev. legacy likes any address.
-		 */
-		max_wait_time = 100;
-		while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
-			stop_cmd(dd);
-			if (!--max_wait_time) {
-				ipath_dbg("Did not get successful read to "
-					  "complete write\n");
-				goto failed_write;
-			}
-		}
-		/* now read (and ignore) the resulting byte */
-		rd_byte(dd);
-		stop_cmd(dd);
-	}
-
-	ret = 0;
-	goto bail;
-
-failed_write:
-	stop_cmd(dd);
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
- */
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-			void *buff, int len)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	return ret;
-}
-
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-			const void *buff, int len)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	return ret;
-}
-
-static u8 flash_csum(struct ipath_flash *ifp, int adjust)
-{
-	u8 *ip = (u8 *) ifp;
-	u8 csum = 0, len;
-
-	/*
-	 * Limit length checksummed to max length of actual data.
-	 * Checksum of erased eeprom will still be bad, but we avoid
-	 * reading past the end of the buffer we were passed.
-	 */
-	len = ifp->if_length;
-	if (len > sizeof(struct ipath_flash))
-		len = sizeof(struct ipath_flash);
-	while (len--)
-		csum += *ip++;
-	csum -= ifp->if_csum;
-	csum = ~csum;
-	if (adjust)
-		ifp->if_csum = csum;
-
-	return csum;
-}
-
-/**
- * ipath_get_guid - get the GUID from the i2c device
- * @dd: the infinipath device
- *
- * We have the capability to use the ipath_nguid field, and get
- * the guid from the first chip's flash, to use for all of them.
- */
-void ipath_get_eeprom_info(struct ipath_devdata *dd)
-{
-	void *buf;
-	struct ipath_flash *ifp;
-	__be64 guid;
-	int len, eep_stat;
-	u8 csum, *bguid;
-	int t = dd->ipath_unit;
-	struct ipath_devdata *dd0 = ipath_lookup(0);
-
-	if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
-		u8 oguid;
-		dd->ipath_guid = dd0->ipath_guid;
-		bguid = (u8 *) & dd->ipath_guid;
-
-		oguid = bguid[7];
-		bguid[7] += t;
-		if (oguid > bguid[7]) {
-			if (bguid[6] == 0xff) {
-				if (bguid[5] == 0xff) {
-					ipath_dev_err(
-						dd,
-						"Can't set %s GUID from "
-						"base, wraps to OUI!\n",
-						ipath_get_unit_name(t));
-					dd->ipath_guid = 0;
-					goto bail;
-				}
-				bguid[5]++;
-			}
-			bguid[6]++;
-		}
-		dd->ipath_nguid = 1;
-
-		ipath_dbg("nguid %u, so adding %u to device 0 guid, "
-			  "for %llx\n",
-			  dd0->ipath_nguid, t,
-			  (unsigned long long) be64_to_cpu(dd->ipath_guid));
-		goto bail;
-	}
-
-	/*
-	 * read full flash, not just currently used part, since it may have
-	 * been written with a newer definition
-	 * */
-	len = sizeof(struct ipath_flash);
-	buf = vmalloc(len);
-	if (!buf) {
-		ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-			      "bytes from eeprom for GUID\n", len);
-		goto bail;
-	}
-
-	mutex_lock(&dd->ipath_eep_lock);
-	eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
-	mutex_unlock(&dd->ipath_eep_lock);
-
-	if (eep_stat) {
-		ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
-		goto done;
-	}
-	ifp = (struct ipath_flash *)buf;
-
-	csum = flash_csum(ifp, 0);
-	if (csum != ifp->if_csum) {
-		dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
-			 "0x%x, not 0x%x\n", csum, ifp->if_csum);
-		goto done;
-	}
-	if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
-	    *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
-		ipath_dev_err(dd, "Invalid GUID %llx from flash; "
-			      "ignoring\n",
-			      *(unsigned long long *) ifp->if_guid);
-		/* don't allow GUID if all 0 or all 1's */
-		goto done;
-	}
-
-	/* complain, but allow it */
-	if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
-		dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
-			 "default, probably not correct!\n",
-			 *(unsigned long long *) ifp->if_guid);
-
-	bguid = ifp->if_guid;
-	if (!bguid[0] && !bguid[1] && !bguid[2]) {
-		/* original incorrect GUID format in flash; fix in
-		 * core copy, by shifting up 2 octets; don't need to
-		 * change top octet, since both it and shifted are
-		 * 0.. */
-		bguid[1] = bguid[3];
-		bguid[2] = bguid[4];
-		bguid[3] = bguid[4] = 0;
-		guid = *(__be64 *) ifp->if_guid;
-		ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
-			   "shifting 2 octets\n");
-	} else
-		guid = *(__be64 *) ifp->if_guid;
-	dd->ipath_guid = guid;
-	dd->ipath_nguid = ifp->if_numguid;
-	/*
-	 * Things are slightly complicated by the desire to transparently
-	 * support both the Pathscale 10-digit serial number and the QLogic
-	 * 13-character version.
-	 */
-	if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
-		&& ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
-		/* This board has a Serial-prefix, which is stored
-		 * elsewhere for backward-compatibility.
-		 */
-		char *snp = dd->ipath_serial;
-		memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
-		snp[sizeof ifp->if_sprefix] = '\0';
-		len = strlen(snp);
-		snp += len;
-		len = (sizeof dd->ipath_serial) - len;
-		if (len > sizeof ifp->if_serial) {
-			len = sizeof ifp->if_serial;
-		}
-		memcpy(snp, ifp->if_serial, len);
-	} else
-		memcpy(dd->ipath_serial, ifp->if_serial,
-		       sizeof ifp->if_serial);
-	if (!strstr(ifp->if_comment, "Tested successfully"))
-		ipath_dev_err(dd, "Board SN %s did not pass functional "
-			"test: %s\n", dd->ipath_serial,
-			ifp->if_comment);
-
-	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
-		   (unsigned long long) be64_to_cpu(dd->ipath_guid));
-
-	memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
-	/*
-	 * Power-on (actually "active") hours are kept as little-endian value
-	 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
-	 * atomic_t while running.
-	 */
-	atomic_set(&dd->ipath_active_time, 0);
-	dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
-
-done:
-	vfree(buf);
-
-bail:;
-}
-
-/**
- * ipath_update_eeprom_log - copy active-time and error counters to eeprom
- * @dd: the infinipath device
- *
- * Although the time is kept as seconds in the ipath_devdata struct, it is
- * rounded to hours for re-write, as we have only 16 bits in EEPROM.
- * First-cut code reads whole (expected) struct ipath_flash, modifies,
- * re-writes. Future direction: read/write only what we need, assuming
- * that the EEPROM had to have been "good enough" for driver init, and
- * if not, we aren't making it worse.
- *
- */
-
-int ipath_update_eeprom_log(struct ipath_devdata *dd)
-{
-	void *buf;
-	struct ipath_flash *ifp;
-	int len, hi_water;
-	uint32_t new_time, new_hrs;
-	u8 csum;
-	int ret, idx;
-	unsigned long flags;
-
-	/* first, check if we actually need to do anything. */
-	ret = 0;
-	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-		if (dd->ipath_eep_st_new_errs[idx]) {
-			ret = 1;
-			break;
-		}
-	}
-	new_time = atomic_read(&dd->ipath_active_time);
-
-	if (ret == 0 && new_time < 3600)
-		return 0;
-
-	/*
-	 * The quick-check above determined that there is something worthy
-	 * of logging, so get current contents and do a more detailed idea.
-	 * read full flash, not just currently used part, since it may have
-	 * been written with a newer definition
-	 */
-	len = sizeof(struct ipath_flash);
-	buf = vmalloc(len);
-	ret = 1;
-	if (!buf) {
-		ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-				"bytes from eeprom for logging\n", len);
-		goto bail;
-	}
-
-	/* Grab semaphore and read current EEPROM. If we get an
-	 * error, let go, but if not, keep it until we finish write.
-	 */
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (ret) {
-		ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
-		goto free_bail;
-	}
-	ret = ipath_eeprom_internal_read(dd, 0, buf, len);
-	if (ret) {
-		mutex_unlock(&dd->ipath_eep_lock);
-		ipath_dev_err(dd, "Unable read EEPROM for logging\n");
-		goto free_bail;
-	}
-	ifp = (struct ipath_flash *)buf;
-
-	csum = flash_csum(ifp, 0);
-	if (csum != ifp->if_csum) {
-		mutex_unlock(&dd->ipath_eep_lock);
-		ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
-				csum, ifp->if_csum);
-		ret = 1;
-		goto free_bail;
-	}
-	hi_water = 0;
-	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-		int new_val = dd->ipath_eep_st_new_errs[idx];
-		if (new_val) {
-			/*
-			 * If we have seen any errors, add to EEPROM values
-			 * We need to saturate at 0xFF (255) and we also
-			 * would need to adjust the checksum if we were
-			 * trying to minimize EEPROM traffic
-			 * Note that we add to actual current count in EEPROM,
-			 * in case it was altered while we were running.
-			 */
-			new_val += ifp->if_errcntp[idx];
-			if (new_val > 0xFF)
-				new_val = 0xFF;
-			if (ifp->if_errcntp[idx] != new_val) {
-				ifp->if_errcntp[idx] = new_val;
-				hi_water = offsetof(struct ipath_flash,
-						if_errcntp) + idx;
-			}
-			/*
-			 * update our shadow (used to minimize EEPROM
-			 * traffic), to match what we are about to write.
-			 */
-			dd->ipath_eep_st_errs[idx] = new_val;
-			dd->ipath_eep_st_new_errs[idx] = 0;
-		}
-	}
-	/*
-	 * now update active-time. We would like to round to the nearest hour
-	 * but unless atomic_t are sure to be proper signed ints we cannot,
-	 * because we need to account for what we "transfer" to EEPROM and
-	 * if we log an hour at 31 minutes, then we would need to set
-	 * active_time to -29 to accurately count the _next_ hour.
-	 */
-	if (new_time >= 3600) {
-		new_hrs = new_time / 3600;
-		atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
-		new_hrs += dd->ipath_eep_hrs;
-		if (new_hrs > 0xFFFF)
-			new_hrs = 0xFFFF;
-		dd->ipath_eep_hrs = new_hrs;
-		if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
-			ifp->if_powerhour[0] = new_hrs & 0xFF;
-			hi_water = offsetof(struct ipath_flash, if_powerhour);
-		}
-		if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
-			ifp->if_powerhour[1] = new_hrs >> 8;
-			hi_water = offsetof(struct ipath_flash, if_powerhour)
-					+ 1;
-		}
-	}
-	/*
-	 * There is a tiny possibility that we could somehow fail to write
-	 * the EEPROM after updating our shadows, but problems from holding
-	 * the spinlock too long are a much bigger issue.
-	 */
-	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-	if (hi_water) {
-		/* we made some change to the data, uopdate cksum and write */
-		csum = flash_csum(ifp, 1);
-		ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
-	}
-	mutex_unlock(&dd->ipath_eep_lock);
-	if (ret)
-		ipath_dev_err(dd, "Failed updating EEPROM\n");
-
-free_bail:
-	vfree(buf);
-bail:
-	return ret;
-
-}
-
-/**
- * ipath_inc_eeprom_err - increment one of the four error counters
- * that are logged to EEPROM.
- * @dd: the infinipath device
- * @eidx: 0..3, the counter to increment
- * @incr: how much to add
- *
- * Each counter is 8-bits, and saturates at 255 (0xFF). They
- * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
- * is called, but it can only be called in a context that allows sleep.
- * This function can be called even at interrupt level.
- */
-
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
-{
-	uint new_val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-	new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
-	if (new_val > 255)
-		new_val = 255;
-	dd->ipath_eep_st_new_errs[eidx] = new_val;
-	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-	return;
-}
-
-static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
-{
-	int ret;
-	struct i2c_chain_desc *icd;
-
-	ret = -ENOENT;
-
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	if (icd->temp_dev == IPATH_NO_DEV) {
-		/* tempsense only exists on new, real-I2C boards */
-		ret = -ENXIO;
-		goto bail;
-	}
-
-	if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-		ipath_dbg("Failed tempsense startcmd\n");
-		stop_cmd(dd);
-		ret = -ENXIO;
-		goto bail;
-	}
-	ret = wr_byte(dd, regnum);
-	stop_cmd(dd);
-	if (ret) {
-		ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
-			      regnum);
-		ret = -ENXIO;
-		goto bail;
-	}
-	if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
-		ipath_dbg("Failed tempsense RD startcmd\n");
-		stop_cmd(dd);
-		ret = -ENXIO;
-		goto bail;
-	}
-	/*
-	 * We can only clock out one byte per command, sensibly
-	 */
-	ret = rd_byte(dd);
-	stop_cmd(dd);
-
-bail:
-	return ret;
-}
-
-#define VALID_TS_RD_REG_MASK 0xBF
-
-/**
- * ipath_tempsense_read - read register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to read from
- *
- * returns reg contents (0..255) or < 0 for error
- */
-int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
-{
-	int ret;
-
-	if (regnum > 7)
-		return -EINVAL;
-
-	/* return a bogus value for (the one) register we do not have */
-	if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
-		return 0;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_tempsense_internal_read(dd, regnum);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	/*
-	 * There are three possibilities here:
-	 * ret is actual value (0..255)
-	 * ret is -ENXIO or -EINVAL from code in this file
-	 * ret is -EINTR from mutex_lock_interruptible.
-	 */
-	return ret;
-}
-
-static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
-					  u8 regnum, u8 data)
-{
-	int ret = -ENOENT;
-	struct i2c_chain_desc *icd;
-
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	if (icd->temp_dev == IPATH_NO_DEV) {
-		/* tempsense only exists on new, real-I2C boards */
-		ret = -ENXIO;
-		goto bail;
-	}
-	if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-		ipath_dbg("Failed tempsense startcmd\n");
-		stop_cmd(dd);
-		ret = -ENXIO;
-		goto bail;
-	}
-	ret = wr_byte(dd, regnum);
-	if (ret) {
-		stop_cmd(dd);
-		ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
-			      regnum);
-		ret = -ENXIO;
-		goto bail;
-	}
-	ret = wr_byte(dd, data);
-	stop_cmd(dd);
-	ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
-	if (ret) {
-		ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
-			      regnum);
-		ret = -ENXIO;
-	}
-
-bail:
-	return ret;
-}
-
-#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
-
-/**
- * ipath_tempsense_write - write register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to write
- * @data: data to write
- *
- * returns 0 for success or < 0 for error
- */
-int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
-{
-	int ret;
-
-	if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
-		return -EINVAL;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_tempsense_internal_write(dd, regnum, data);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	/*
-	 * There are three possibilities here:
-	 * ret is 0 for success
-	 * ret is -ENXIO or -EINVAL from code in this file
-	 * ret is -EINTR from mutex_lock_interruptible.
-	 */
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
deleted file mode 100644
index 6187b84..0000000
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ /dev/null
@@ -1,2619 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/cdev.h>
-#include <linux/swap.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/cpu.h>
-#include <linux/uio.h>
-#include <asm/pgtable.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-#include "ipath_user_sdma.h"
-
-static int ipath_open(struct inode *, struct file *);
-static int ipath_close(struct inode *, struct file *);
-static ssize_t ipath_write(struct file *, const char __user *, size_t,
-			   loff_t *);
-static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
-static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
-static int ipath_mmap(struct file *, struct vm_area_struct *);
-
-/*
- * This is really, really weird shit - write() and writev() here
- * have completely unrelated semantics.  Sucky userland ABI,
- * film at 11.
- */
-static const struct file_operations ipath_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_write,
-	.write_iter = ipath_write_iter,
-	.open = ipath_open,
-	.release = ipath_close,
-	.poll = ipath_poll,
-	.mmap = ipath_mmap,
-	.llseek = noop_llseek,
-};
-
-/*
- * Convert kernel virtual addresses to physical addresses so they don't
- * potentially conflict with the chip addresses used as mmap offsets.
- * It doesn't really matter what mmap offset we use as long as we can
- * interpret it correctly.
- */
-static u64 cvt_kvaddr(void *p)
-{
-	struct page *page;
-	u64 paddr = 0;
-
-	page = vmalloc_to_page(p);
-	if (page)
-		paddr = page_to_pfn(page) << PAGE_SHIFT;
-
-	return paddr;
-}
-
-static int ipath_get_base_info(struct file *fp,
-			       void __user *ubase, size_t ubase_size)
-{
-	struct ipath_portdata *pd = port_fp(fp);
-	int ret = 0;
-	struct ipath_base_info *kinfo = NULL;
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned subport_cnt;
-	int shared, master;
-	size_t sz;
-
-	subport_cnt = pd->port_subport_cnt;
-	if (!subport_cnt) {
-		shared = 0;
-		master = 0;
-		subport_cnt = 1;
-	} else {
-		shared = 1;
-		master = !subport_fp(fp);
-	}
-
-	sz = sizeof(*kinfo);
-	/* If port sharing is not requested, allow the old size structure */
-	if (!shared)
-		sz -= 7 * sizeof(u64);
-	if (ubase_size < sz) {
-		ipath_cdbg(PROC,
-			   "Base size %zu, need %zu (version mismatch?)\n",
-			   ubase_size, sz);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
-	if (kinfo == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	ret = dd->ipath_f_get_base_info(pd, kinfo);
-	if (ret < 0)
-		goto bail;
-
-	kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
-	kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
-	kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
-	kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
-	/*
-	 * have to mmap whole thing
-	 */
-	kinfo->spi_rcv_egrbuftotlen =
-		pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-	kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
-	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
-		pd->port_rcvegrbuf_chunks;
-	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
-	if (master)
-		kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
-	/*
-	 * for this use, may be ipath_cfgports summed over all chips that
-	 * are are configured and present
-	 */
-	kinfo->spi_nports = dd->ipath_cfgports;
-	/* unit (chip/board) our port is on */
-	kinfo->spi_unit = dd->ipath_unit;
-	/* for now, only a single page */
-	kinfo->spi_tid_maxsize = PAGE_SIZE;
-
-	/*
-	 * Doing this per port, and based on the skip value, etc.  This has
-	 * to be the actual buffer size, since the protocol code treats it
-	 * as an array.
-	 *
-	 * These have to be set to user addresses in the user code via mmap.
-	 * These values are used on return to user code for the mmap target
-	 * addresses only.  For 32 bit, same 44 bit address problem, so use
-	 * the physical address, not virtual.  Before 2.6.11, using the
-	 * page_address() macro worked, but in 2.6.11, even that returns the
-	 * full 64 bit address (upper bits all 1's).  So far, using the
-	 * physical addresses (or chip offsets, for chip mapping) works, but
-	 * no doubt some future kernel release will change that, and we'll be
-	 * on to yet another method of dealing with this.
-	 */
-	kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-	kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
-	kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
-	kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
-	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
-		(void *) dd->ipath_statusp -
-		(void *) dd->ipath_pioavailregs_dma;
-	if (!shared) {
-		kinfo->spi_piocnt = pd->port_piocnt;
-		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-			dd->ipath_ureg_align * pd->port_port;
-	} else if (master) {
-		kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
-				    (pd->port_piocnt % subport_cnt);
-		/* Master's PIO buffers are after all the slave's */
-		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-			dd->ipath_palign *
-			(pd->port_piocnt - kinfo->spi_piocnt);
-	} else {
-		unsigned slave = subport_fp(fp) - 1;
-
-		kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
-		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-			dd->ipath_palign * kinfo->spi_piocnt * slave;
-	}
-
-	if (shared) {
-		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
-			dd->ipath_ureg_align * pd->port_port;
-		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
-		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
-		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
-
-		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
-			PAGE_SIZE * subport_fp(fp));
-
-		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * subport_fp(fp));
-		kinfo->spi_rcvhdr_tailaddr = 0;
-		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
-			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
-			subport_fp(fp));
-
-		kinfo->spi_subport_uregbase =
-			cvt_kvaddr(pd->subport_uregbase);
-		kinfo->spi_subport_rcvegrbuf =
-			cvt_kvaddr(pd->subport_rcvegrbuf);
-		kinfo->spi_subport_rcvhdr_base =
-			cvt_kvaddr(pd->subport_rcvhdr_base);
-		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-			kinfo->spi_port, kinfo->spi_runtime_flags,
-			(unsigned long long) kinfo->spi_subport_uregbase,
-			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
-			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
-	}
-
-	/*
-	 * All user buffers are 2KB buffers.  If we ever support
-	 * giving 4KB buffers to user processes, this will need some
-	 * work.
-	 */
-	kinfo->spi_pioindex = (kinfo->spi_piobufbase -
-		(dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
-	kinfo->spi_pioalign = dd->ipath_palign;
-
-	kinfo->spi_qpair = IPATH_KD_QP;
-	/*
-	 * user mode PIO buffers are always 2KB, even when 4KB can
-	 * be received, and sent via the kernel; this is ibmaxlen
-	 * for 2K MTU.
-	 */
-	kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
-	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
-	kinfo->spi_port = pd->port_port;
-	kinfo->spi_subport = subport_fp(fp);
-	kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
-	kinfo->spi_hw_version = dd->ipath_revision;
-
-	if (master) {
-		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-	}
-
-	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
-	if (copy_to_user(ubase, kinfo, sz))
-		ret = -EFAULT;
-
-bail:
-	kfree(kinfo);
-	return ret;
-}
-
-/**
- * ipath_tid_update - update a port TID
- * @pd: the port
- * @fp: the ipath device file
- * @ti: the TID information
- *
- * The new implementation as of Oct 2004 is that the driver assigns
- * the tid and returns it to the caller.   To make it easier to
- * catch bugs, and to reduce search time, we keep a cursor for
- * each port, walking the shadow tid array to find one that's not
- * in use.
- *
- * For now, if we can't allocate the full list, we fail, although
- * in the long run, we'll allocate as many as we can, and the
- * caller will deal with that by trying the remaining pages later.
- * That means that when we fail, we have to mark the tids as not in
- * use again, in our shadow copy.
- *
- * It's up to the caller to free the tids when they are done.
- * We'll unlock the pages as they free them.
- *
- * Also, right now we are locking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.
- */
-static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
-			    const struct ipath_tid_info *ti)
-{
-	int ret = 0, ntids;
-	u32 tid, porttid, cnt, i, tidcnt, tidoff;
-	u16 *tidlist;
-	struct ipath_devdata *dd = pd->port_dd;
-	u64 physaddr;
-	unsigned long vaddr;
-	u64 __iomem *tidbase;
-	unsigned long tidmap[8];
-	struct page **pagep = NULL;
-	unsigned subport = subport_fp(fp);
-
-	if (!dd->ipath_pageshadow) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	cnt = ti->tidcnt;
-	if (!cnt) {
-		ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
-			  (unsigned long long) ti->tidlist);
-		/*
-		 * Should we treat as success?  likely a bug
-		 */
-		ret = -EFAULT;
-		goto done;
-	}
-	porttid = pd->port_port * dd->ipath_rcvtidcnt;
-	if (!pd->port_subport_cnt) {
-		tidcnt = dd->ipath_rcvtidcnt;
-		tid = pd->port_tidcursor;
-		tidoff = 0;
-	} else if (!subport) {
-		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-		tidoff = dd->ipath_rcvtidcnt - tidcnt;
-		porttid += tidoff;
-		tid = tidcursor_fp(fp);
-	} else {
-		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-		tidoff = tidcnt * (subport - 1);
-		porttid += tidoff;
-		tid = tidcursor_fp(fp);
-	}
-	if (cnt > tidcnt) {
-		/* make sure it all fits in port_tid_pg_list */
-		dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
-			 "TIDs, only trying max (%u)\n", cnt, tidcnt);
-		cnt = tidcnt;
-	}
-	pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
-	tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
-
-	memset(tidmap, 0, sizeof(tidmap));
-	/* before decrement; chip actual # */
-	ntids = tidcnt;
-	tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-				   dd->ipath_rcvtidbase +
-				   porttid * sizeof(*tidbase));
-
-	ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
-		   pd->port_port, cnt, tid, tidbase);
-
-	/* virtual address of first page in transfer */
-	vaddr = ti->tidvaddr;
-	if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
-		       cnt * PAGE_SIZE)) {
-		ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
-			  (void *)vaddr, cnt);
-		ret = -EFAULT;
-		goto done;
-	}
-	ret = ipath_get_user_pages(vaddr, cnt, pagep);
-	if (ret) {
-		if (ret == -EBUSY) {
-			ipath_dbg("Failed to lock addr %p, %u pages "
-				  "(already locked)\n",
-				  (void *) vaddr, cnt);
-			/*
-			 * for now, continue, and see what happens but with
-			 * the new implementation, this should never happen,
-			 * unless perhaps the user has mpin'ed the pages
-			 * themselves (something we need to test)
-			 */
-			ret = 0;
-		} else {
-			dev_info(&dd->pcidev->dev,
-				 "Failed to lock addr %p, %u pages: "
-				 "errno %d\n", (void *) vaddr, cnt, -ret);
-			goto done;
-		}
-	}
-	for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
-		for (; ntids--; tid++) {
-			if (tid == tidcnt)
-				tid = 0;
-			if (!dd->ipath_pageshadow[porttid + tid])
-				break;
-		}
-		if (ntids < 0) {
-			/*
-			 * oops, wrapped all the way through their TIDs,
-			 * and didn't have enough free; see comments at
-			 * start of routine
-			 */
-			ipath_dbg("Not enough free TIDs for %u pages "
-				  "(index %d), failing\n", cnt, i);
-			i--;	/* last tidlist[i] not filled in */
-			ret = -ENOMEM;
-			break;
-		}
-		tidlist[i] = tid + tidoff;
-		ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-			   "vaddr %lx\n", i, tid + tidoff, vaddr);
-		/* we "know" system pages and TID pages are same size */
-		dd->ipath_pageshadow[porttid + tid] = pagep[i];
-		dd->ipath_physshadow[porttid + tid] = ipath_map_page(
-			dd->pcidev, pagep[i], 0, PAGE_SIZE,
-			PCI_DMA_FROMDEVICE);
-		/*
-		 * don't need atomic or it's overhead
-		 */
-		__set_bit(tid, tidmap);
-		physaddr = dd->ipath_physshadow[porttid + tid];
-		ipath_stats.sps_pagelocks++;
-		ipath_cdbg(VERBOSE,
-			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
-			   tid, vaddr, (unsigned long long) physaddr,
-			   pagep[i]);
-		dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
-				    physaddr);
-		/*
-		 * don't check this tid in ipath_portshadow, since we
-		 * just filled it in; start with the next one.
-		 */
-		tid++;
-	}
-
-	if (ret) {
-		u32 limit;
-	cleanup:
-		/* jump here if copy out of updated info failed... */
-		ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
-			  -ret, i, cnt);
-		/* same code that's in ipath_free_tid() */
-		limit = sizeof(tidmap) * BITS_PER_BYTE;
-		if (limit > tidcnt)
-			/* just in case size changes in future */
-			limit = tidcnt;
-		tid = find_first_bit((const unsigned long *)tidmap, limit);
-		for (; tid < limit; tid++) {
-			if (!test_bit(tid, tidmap))
-				continue;
-			if (dd->ipath_pageshadow[porttid + tid]) {
-				ipath_cdbg(VERBOSE, "Freeing TID %u\n",
-					   tid);
-				dd->ipath_f_put_tid(dd, &tidbase[tid],
-						    RCVHQ_RCV_TYPE_EXPECTED,
-						    dd->ipath_tidinvalid);
-				pci_unmap_page(dd->pcidev,
-					dd->ipath_physshadow[porttid + tid],
-					PAGE_SIZE, PCI_DMA_FROMDEVICE);
-				dd->ipath_pageshadow[porttid + tid] = NULL;
-				ipath_stats.sps_pageunlocks++;
-			}
-		}
-		ipath_release_user_pages(pagep, cnt);
-	} else {
-		/*
-		 * Copy the updated array, with ipath_tid's filled in, back
-		 * to user.  Since we did the copy in already, this "should
-		 * never fail" If it does, we have to clean up...
-		 */
-		if (copy_to_user((void __user *)
-				 (unsigned long) ti->tidlist,
-				 tidlist, cnt * sizeof(*tidlist))) {
-			ret = -EFAULT;
-			goto cleanup;
-		}
-		if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
-				 tidmap, sizeof tidmap)) {
-			ret = -EFAULT;
-			goto cleanup;
-		}
-		if (tid == tidcnt)
-			tid = 0;
-		if (!pd->port_subport_cnt)
-			pd->port_tidcursor = tid;
-		else
-			tidcursor_fp(fp) = tid;
-	}
-
-done:
-	if (ret)
-		ipath_dbg("Failed to map %u TID pages, failing with %d\n",
-			  ti->tidcnt, -ret);
-	return ret;
-}
-
-/**
- * ipath_tid_free - free a port TID
- * @pd: the port
- * @subport: the subport
- * @ti: the TID info
- *
- * right now we are unlocking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.  We check that the TID is in range for this port
- * but otherwise don't check validity; if user has an error and
- * frees the wrong tid, it's only their own data that can thereby
- * be corrupted.  We do check that the TID was in use, for sanity
- * We always use our idea of the saved address, not the address that
- * they pass in to us.
- */
-
-static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
-			  const struct ipath_tid_info *ti)
-{
-	int ret = 0;
-	u32 tid, porttid, cnt, limit, tidcnt;
-	struct ipath_devdata *dd = pd->port_dd;
-	u64 __iomem *tidbase;
-	unsigned long tidmap[8];
-
-	if (!dd->ipath_pageshadow) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
-			   sizeof tidmap)) {
-		ret = -EFAULT;
-		goto done;
-	}
-
-	porttid = pd->port_port * dd->ipath_rcvtidcnt;
-	if (!pd->port_subport_cnt)
-		tidcnt = dd->ipath_rcvtidcnt;
-	else if (!subport) {
-		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-		porttid += dd->ipath_rcvtidcnt - tidcnt;
-	} else {
-		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-		porttid += tidcnt * (subport - 1);
-	}
-	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-				   dd->ipath_rcvtidbase +
-				   porttid * sizeof(*tidbase));
-
-	limit = sizeof(tidmap) * BITS_PER_BYTE;
-	if (limit > tidcnt)
-		/* just in case size changes in future */
-		limit = tidcnt;
-	tid = find_first_bit(tidmap, limit);
-	ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
-		   "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
-		   limit, tid, porttid);
-	for (cnt = 0; tid < limit; tid++) {
-		/*
-		 * small optimization; if we detect a run of 3 or so without
-		 * any set, use find_first_bit again.  That's mainly to
-		 * accelerate the case where we wrapped, so we have some at
-		 * the beginning, and some at the end, and a big gap
-		 * in the middle.
-		 */
-		if (!test_bit(tid, tidmap))
-			continue;
-		cnt++;
-		if (dd->ipath_pageshadow[porttid + tid]) {
-			struct page *p;
-			p = dd->ipath_pageshadow[porttid + tid];
-			dd->ipath_pageshadow[porttid + tid] = NULL;
-			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
-				   pid_nr(pd->port_pid), tid);
-			dd->ipath_f_put_tid(dd, &tidbase[tid],
-					    RCVHQ_RCV_TYPE_EXPECTED,
-					    dd->ipath_tidinvalid);
-			pci_unmap_page(dd->pcidev,
-				dd->ipath_physshadow[porttid + tid],
-				PAGE_SIZE, PCI_DMA_FROMDEVICE);
-			ipath_release_user_pages(&p, 1);
-			ipath_stats.sps_pageunlocks++;
-		} else
-			ipath_dbg("Unused tid %u, ignoring\n", tid);
-	}
-	if (cnt != ti->tidcnt)
-		ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
-			  ti->tidcnt, cnt);
-done:
-	if (ret)
-		ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
-			  ti->tidcnt, -ret);
-	return ret;
-}
-
-/**
- * ipath_set_part_key - set a partition key
- * @pd: the port
- * @key: the key
- *
- * We can have up to 4 active at a time (other than the default, which is
- * always allowed).  This is somewhat tricky, since multiple ports may set
- * the same key, so we reference count them, and clean up at exit.  All 4
- * partition keys are packed into a single infinipath register.  It's an
- * error for a process to set the same pkey multiple times.  We provide no
- * mechanism to de-allocate a pkey at this time, we may eventually need to
- * do that.  I've used the atomic operations, and no locking, and only make
- * a single pass through what's available.  This should be more than
- * adequate for some time. I'll think about spinlocks or the like if and as
- * it's necessary.
- */
-static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	int i, any = 0, pidx = -1;
-	u16 lkey = key & 0x7FFF;
-	int ret;
-
-	if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
-		/* nothing to do; this key always valid */
-		ret = 0;
-		goto bail;
-	}
-
-	ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
-		   "%hx:%x %hx:%x %hx:%x %hx:%x\n",
-		   pd->port_port, key, dd->ipath_pkeys[0],
-		   atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
-		   atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
-		   atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
-		   atomic_read(&dd->ipath_pkeyrefs[3]));
-
-	if (!lkey) {
-		ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
-			   pd->port_port);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Set the full membership bit, because it has to be
-	 * set in the register or the packet, and it seems
-	 * cleaner to set in the register than to force all
-	 * callers to set it. (see bug 4331)
-	 */
-	key |= 0x8000;
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		if (!pd->port_pkeys[i] && pidx == -1)
-			pidx = i;
-		if (pd->port_pkeys[i] == key) {
-			ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
-				   "(%x) more than once\n",
-				   pd->port_port, key);
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (pidx == -1) {
-		ipath_dbg("All pkeys for port %u already in use, "
-			  "can't set %x\n", pd->port_port, key);
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		if (dd->ipath_pkeys[i] == key) {
-			atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
-
-			if (atomic_inc_return(pkrefs) > 1) {
-				pd->port_pkeys[pidx] = key;
-				ipath_cdbg(VERBOSE, "p%u set key %x "
-					   "matches #%d, count now %d\n",
-					   pd->port_port, key, i,
-					   atomic_read(pkrefs));
-				ret = 0;
-				goto bail;
-			} else {
-				/*
-				 * lost race, decrement count, catch below
-				 */
-				atomic_dec(pkrefs);
-				ipath_cdbg(VERBOSE, "Lost race, count was "
-					   "0, after dec, it's %d\n",
-					   atomic_read(pkrefs));
-				any++;
-			}
-		}
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			/*
-			 * It makes no sense to have both the limited and
-			 * full membership PKEY set at the same time since
-			 * the unlimited one will disable the limited one.
-			 */
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ipath_dbg("port %u, all pkeys already in use, "
-			  "can't set %x\n", pd->port_port, key);
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			u64 pkey;
-
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
-			pkey =
-				(u64) dd->ipath_pkeys[0] |
-				((u64) dd->ipath_pkeys[1] << 16) |
-				((u64) dd->ipath_pkeys[2] << 32) |
-				((u64) dd->ipath_pkeys[3] << 48);
-			ipath_cdbg(PROC, "p%u set key %x in #%d, "
-				   "portidx %d, new pkey reg %llx\n",
-				   pd->port_port, key, i, pidx,
-				   (unsigned long long) pkey);
-			ipath_write_kreg(
-				dd, dd->ipath_kregs->kr_partitionkey, pkey);
-
-			ret = 0;
-			goto bail;
-		}
-	}
-	ipath_dbg("port %u, all pkeys already in use 2nd pass, "
-		  "can't set %x\n", pd->port_port, key);
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_manage_rcvq - manage a port's receive queue
- * @pd: the port
- * @subport: the subport
- * @start_stop: action to carry out
- *
- * start_stop == 0 disables receive on the port, for use in queue
- * overflow conditions.  start_stop==1 re-enables, to be used to
- * re-init the software copy of the head register
- */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
-			     int start_stop)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-
-	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
-		   start_stop ? "en" : "dis", dd->ipath_unit,
-		   pd->port_port, subport);
-	if (subport)
-		goto bail;
-	/* atomically clear receive enable port. */
-	if (start_stop) {
-		/*
-		 * On enable, force in-memory copy of the tail register to
-		 * 0, so that protocol code doesn't have to worry about
-		 * whether or not the chip has yet updated the in-memory
-		 * copy or not on return from the system call. The chip
-		 * always resets it's tail register back to 0 on a
-		 * transition from disabled to enabled.  This could cause a
-		 * problem if software was broken, and did the enable w/o
-		 * the disable, but eventually the in-memory copy will be
-		 * updated and correct itself, even in the face of software
-		 * bugs.
-		 */
-		if (pd->port_rcvhdrtail_kvaddr)
-			ipath_clear_rcvhdrtail(pd);
-		set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-			&dd->ipath_rcvctrl);
-	} else
-		clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
-			  &dd->ipath_rcvctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-	/* now be sure chip saw it before we return */
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	if (start_stop) {
-		/*
-		 * And try to be sure that tail reg update has happened too.
-		 * This should in theory interlock with the RXE changes to
-		 * the tail register.  Don't assign it to the tail register
-		 * in memory copy, since we could overwrite an update by the
-		 * chip if we did.
-		 */
-		ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-	}
-	/* always; new head should be equal to new tail; see above */
-bail:
-	return 0;
-}
-
-static void ipath_clean_part_key(struct ipath_portdata *pd,
-				 struct ipath_devdata *dd)
-{
-	int i, j, pchanged = 0;
-	u64 oldpkey;
-
-	/* for debugging only */
-	oldpkey = (u64) dd->ipath_pkeys[0] |
-		((u64) dd->ipath_pkeys[1] << 16) |
-		((u64) dd->ipath_pkeys[2] << 32) |
-		((u64) dd->ipath_pkeys[3] << 48);
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		if (!pd->port_pkeys[i])
-			continue;
-		ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
-			   pd->port_pkeys[i]);
-		for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
-			/* check for match independent of the global bit */
-			if ((dd->ipath_pkeys[j] & 0x7fff) !=
-			    (pd->port_pkeys[i] & 0x7fff))
-				continue;
-			if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
-				ipath_cdbg(VERBOSE, "p%u clear key "
-					   "%x matches #%d\n",
-					   pd->port_port,
-					   pd->port_pkeys[i], j);
-				ipath_stats.sps_pkeys[j] =
-					dd->ipath_pkeys[j] = 0;
-				pchanged++;
-			} else {
-				ipath_cdbg(VERBOSE, "p%u key %x matches #%d, "
-					   "but ref still %d\n", pd->port_port,
-					   pd->port_pkeys[i], j,
-					   atomic_read(&dd->ipath_pkeyrefs[j]));
-				break;
-			}
-		}
-		pd->port_pkeys[i] = 0;
-	}
-	if (pchanged) {
-		u64 pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
-			   "new pkey reg %llx\n", pd->port_port,
-			   (unsigned long long) oldpkey,
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-	}
-}
-
-/*
- * Initialize the port data with the receive buffer sizes
- * so this can be done while the master port is locked.
- * Otherwise, there is a race with a slave opening the port
- * and seeing these fields uninitialized.
- */
-static void init_user_egr_sizes(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned egrperchunk, egrcnt, size;
-
-	/*
-	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
-	 * physically contiguous memory, advance through it until used up
-	 * and then allocate more.  Of course, we need memory to store those
-	 * extra pointers, now.  Started out with 256KB, but under heavy
-	 * memory pressure (creating large files and then copying them over
-	 * NFS while doing lots of MPI jobs), we hit some allocation
-	 * failures, even though we can sleep...  (2.6.10) Still get
-	 * failures at 64K.  32K is the lowest we can go without wasting
-	 * additional memory.
-	 */
-	size = 0x8000;
-	egrperchunk = size / dd->ipath_rcvegrbufsize;
-	egrcnt = dd->ipath_rcvegrcnt;
-	pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
-	pd->port_rcvegrbufs_perchunk = egrperchunk;
-	pd->port_rcvegrbuf_size = size;
-}
-
-/**
- * ipath_create_user_egr - allocate eager TID buffers
- * @pd: the port to allocate TID buffers for
- *
- * This routine is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance
- * This is the user port version
- *
- * Allocate the eager TID buffers and program them into infinipath
- * They are no longer completely contiguous, we do multiple allocation
- * calls.
- */
-static int ipath_create_user_egr(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
-	size_t size;
-	int ret;
-	gfp_t gfp_flags;
-
-	/*
-	 * GFP_USER, but without GFP_FS, so buffer cache can be
-	 * coalesced (we hope); otherwise, even at order 4,
-	 * heavy filesystem activity makes these fail, and we can
-	 * use compound pages.
-	 */
-	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
-	egrcnt = dd->ipath_rcvegrcnt;
-	/* TID number offset for this port */
-	egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
-	egrsize = dd->ipath_rcvegrbufsize;
-	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
-		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
-
-	chunk = pd->port_rcvegrbuf_chunks;
-	egrperchunk = pd->port_rcvegrbufs_perchunk;
-	size = pd->port_rcvegrbuf_size;
-	pd->port_rcvegrbuf = kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf[0]),
-					   GFP_KERNEL);
-	if (!pd->port_rcvegrbuf) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	pd->port_rcvegrbuf_phys =
-		kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf_phys[0]),
-			      GFP_KERNEL);
-	if (!pd->port_rcvegrbuf_phys) {
-		ret = -ENOMEM;
-		goto bail_rcvegrbuf;
-	}
-	for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-
-		pd->port_rcvegrbuf[e] = dma_alloc_coherent(
-			&dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
-			gfp_flags);
-
-		if (!pd->port_rcvegrbuf[e]) {
-			ret = -ENOMEM;
-			goto bail_rcvegrbuf_phys;
-		}
-	}
-
-	pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
-
-	for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
-		dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
-		unsigned i;
-
-		for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
-			dd->ipath_f_put_tid(dd, e + egroff +
-					    (u64 __iomem *)
-					    ((char __iomem *)
-					     dd->ipath_kregbase +
-					     dd->ipath_rcvegrbase),
-					    RCVHQ_RCV_TYPE_EAGER, pa);
-			pa += egrsize;
-		}
-		cond_resched();	/* don't hog the cpu */
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_rcvegrbuf_phys:
-	for (e = 0; e < pd->port_rcvegrbuf_chunks &&
-		pd->port_rcvegrbuf[e]; e++) {
-		dma_free_coherent(&dd->pcidev->dev, size,
-				  pd->port_rcvegrbuf[e],
-				  pd->port_rcvegrbuf_phys[e]);
-
-	}
-	kfree(pd->port_rcvegrbuf_phys);
-	pd->port_rcvegrbuf_phys = NULL;
-bail_rcvegrbuf:
-	kfree(pd->port_rcvegrbuf);
-	pd->port_rcvegrbuf = NULL;
-bail:
-	return ret;
-}
-
-
-/* common code for the mappings on dma_alloc_coherent mem */
-static int ipath_mmap_mem(struct vm_area_struct *vma,
-	struct ipath_portdata *pd, unsigned len, int write_ok,
-	void *kvaddr, char *what)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned long pfn;
-	int ret;
-
-	if ((vma->vm_end - vma->vm_start) > len) {
-		dev_info(&dd->pcidev->dev,
-		         "FAIL on %s: len %lx > %x\n", what,
-			 vma->vm_end - vma->vm_start, len);
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	if (!write_ok) {
-		if (vma->vm_flags & VM_WRITE) {
-			dev_info(&dd->pcidev->dev,
-				 "%s must be mapped readonly\n", what);
-			ret = -EPERM;
-			goto bail;
-		}
-
-		/* don't allow them to later change with mprotect */
-		vma->vm_flags &= ~VM_MAYWRITE;
-	}
-
-	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
-	ret = remap_pfn_range(vma, vma->vm_start, pfn,
-			      len, vma->vm_page_prot);
-	if (ret)
-		dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
-			 "bytes r%c failed: %d\n", what, pd->port_port,
-			 pfn, len, write_ok?'w':'o', ret);
-	else
-		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
-			   "r%c\n", what, pd->port_port, pfn, len,
-			   write_ok?'w':'o');
-bail:
-	return ret;
-}
-
-static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
-		     u64 ureg)
-{
-	unsigned long phys;
-	int ret;
-
-	/*
-	 * This is real hardware, so use io_remap.  This is the mechanism
-	 * for the user process to update the head registers for their port
-	 * in the chip.
-	 */
-	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-		dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
-			 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
-		ret = -EFAULT;
-	} else {
-		phys = dd->ipath_physaddr + ureg;
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-		ret = io_remap_pfn_range(vma, vma->vm_start,
-					 phys >> PAGE_SHIFT,
-					 vma->vm_end - vma->vm_start,
-					 vma->vm_page_prot);
-	}
-	return ret;
-}
-
-static int mmap_piobufs(struct vm_area_struct *vma,
-			struct ipath_devdata *dd,
-			struct ipath_portdata *pd,
-			unsigned piobufs, unsigned piocnt)
-{
-	unsigned long phys;
-	int ret;
-
-	/*
-	 * When we map the PIO buffers in the chip, we want to map them as
-	 * writeonly, no read possible.   This prevents access to previous
-	 * process data, and catches users who might try to read the i/o
-	 * space due to a bug.
-	 */
-	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
-		dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
-			 "reqlen %lx > PAGE\n",
-			 vma->vm_end - vma->vm_start);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	phys = dd->ipath_physaddr + piobufs;
-
-#if defined(__powerpc__)
-	/* There isn't a generic way to specify writethrough mappings */
-	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
-	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
-#endif
-
-	/*
-	 * don't allow them to later change to readable with mprotect (for when
-	 * not initially mapped readable, as is normally the case)
-	 */
-	vma->vm_flags &= ~VM_MAYREAD;
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-
-	ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
-				 vma->vm_end - vma->vm_start,
-				 vma->vm_page_prot);
-bail:
-	return ret;
-}
-
-static int mmap_rcvegrbufs(struct vm_area_struct *vma,
-			   struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned long start, size;
-	size_t total_size, i;
-	unsigned long pfn;
-	int ret;
-
-	size = pd->port_rcvegrbuf_size;
-	total_size = pd->port_rcvegrbuf_chunks * size;
-	if ((vma->vm_end - vma->vm_start) > total_size) {
-		dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
-			 "reqlen %lx > actual %lx\n",
-			 vma->vm_end - vma->vm_start,
-			 (unsigned long) total_size);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (vma->vm_flags & VM_WRITE) {
-		dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
-			 "writable (flags=%lx)\n", vma->vm_flags);
-		ret = -EPERM;
-		goto bail;
-	}
-	/* don't allow them to later change to writeable with mprotect */
-	vma->vm_flags &= ~VM_MAYWRITE;
-
-	start = vma->vm_start;
-
-	for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-		pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
-		ret = remap_pfn_range(vma, start, pfn, size,
-				      vma->vm_page_prot);
-		if (ret < 0)
-			goto bail;
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/*
- * ipath_file_vma_fault - handle a VMA page fault.
- */
-static int ipath_file_vma_fault(struct vm_area_struct *vma,
-					struct vm_fault *vmf)
-{
-	struct page *page;
-
-	page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
-	if (!page)
-		return VM_FAULT_SIGBUS;
-	get_page(page);
-	vmf->page = page;
-
-	return 0;
-}
-
-static const struct vm_operations_struct ipath_file_vm_ops = {
-	.fault = ipath_file_vma_fault,
-};
-
-static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
-		       struct ipath_portdata *pd, unsigned subport)
-{
-	unsigned long len;
-	struct ipath_devdata *dd;
-	void *addr;
-	size_t size;
-	int ret = 0;
-
-	/* If the port is not shared, all addresses should be physical */
-	if (!pd->port_subport_cnt)
-		goto bail;
-
-	dd = pd->port_dd;
-	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-
-	/*
-	 * Each process has all the subport uregbase, rcvhdrq, and
-	 * rcvegrbufs mmapped - as an array for all the processes,
-	 * and also separately for this process.
-	 */
-	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
-		addr = pd->subport_uregbase;
-		size = PAGE_SIZE * pd->port_subport_cnt;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
-		addr = pd->subport_rcvhdr_base;
-		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
-		addr = pd->subport_rcvegrbuf;
-		size *= pd->port_subport_cnt;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
-                                        PAGE_SIZE * subport)) {
-                addr = pd->subport_uregbase + PAGE_SIZE * subport;
-                size = PAGE_SIZE;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
-                                pd->port_rcvhdrq_size * subport)) {
-                addr = pd->subport_rcvhdr_base +
-                        pd->port_rcvhdrq_size * subport;
-                size = pd->port_rcvhdrq_size;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
-                               size * subport)) {
-                addr = pd->subport_rcvegrbuf + size * subport;
-                /* rcvegrbufs are read-only on the slave */
-                if (vma->vm_flags & VM_WRITE) {
-                        dev_info(&dd->pcidev->dev,
-                                 "Can't map eager buffers as "
-                                 "writable (flags=%lx)\n", vma->vm_flags);
-                        ret = -EPERM;
-                        goto bail;
-                }
-                /*
-                 * Don't allow permission to later change to writeable
-                 * with mprotect.
-                 */
-                vma->vm_flags &= ~VM_MAYWRITE;
-	} else {
-		goto bail;
-	}
-	len = vma->vm_end - vma->vm_start;
-	if (len > size) {
-		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
-	vma->vm_ops = &ipath_file_vm_ops;
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_mmap - mmap various structures into user space
- * @fp: the file pointer
- * @vma: the VM area
- *
- * We use this to have a shared buffer between the kernel and the user code
- * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
- * buffers in the chip.  We have the open and close entries so we can bump
- * the ref count and keep the driver from being unloaded while still mapped.
- */
-static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-	struct ipath_portdata *pd;
-	struct ipath_devdata *dd;
-	u64 pgaddr, ureg;
-	unsigned piobufs, piocnt;
-	int ret;
-
-	pd = port_fp(fp);
-	if (!pd) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	dd = pd->port_dd;
-
-	/*
-	 * This is the ipath_do_user_init() code, mapping the shared buffers
-	 * into the user process. The address referred to by vm_pgoff is the
-	 * file offset passed via mmap().  For shared ports, this is the
-	 * kernel vmalloc() address of the pages to share with the master.
-	 * For non-shared or master ports, this is a physical address.
-	 * We only do one mmap for each space mapped.
-	 */
-	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
-
-	/*
-	 * Check for 0 in case one of the allocations failed, but user
-	 * called mmap anyway.
-	 */
-	if (!pgaddr)  {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
-		   (unsigned long long) pgaddr, vma->vm_start,
-		   vma->vm_end - vma->vm_start, dd->ipath_unit,
-		   pd->port_port, subport_fp(fp));
-
-	/*
-	 * Physical addresses must fit in 40 bits for our hardware.
-	 * Check for kernel virtual addresses first, anything else must
-	 * match a HW or memory address.
-	 */
-	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
-	if (ret) {
-		if (ret > 0)
-			ret = 0;
-		goto bail;
-	}
-
-	ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
-	if (!pd->port_subport_cnt) {
-		/* port is not shared */
-		piocnt = pd->port_piocnt;
-		piobufs = pd->port_piobufs;
-	} else if (!subport_fp(fp)) {
-		/* caller is the master */
-		piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
-			 (pd->port_piocnt % pd->port_subport_cnt);
-		piobufs = pd->port_piobufs +
-			dd->ipath_palign * (pd->port_piocnt - piocnt);
-	} else {
-		unsigned slave = subport_fp(fp) - 1;
-
-		/* caller is a slave */
-		piocnt = pd->port_piocnt / pd->port_subport_cnt;
-		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
-	}
-
-	if (pgaddr == ureg)
-		ret = mmap_ureg(vma, dd, ureg);
-	else if (pgaddr == piobufs)
-		ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
-	else if (pgaddr == dd->ipath_pioavailregs_phys)
-		/* in-memory copy of pioavail registers */
-		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-			      	     (void *) dd->ipath_pioavailregs_dma,
-				     "pioavail registers");
-	else if (pgaddr == pd->port_rcvegr_phys)
-		ret = mmap_rcvegrbufs(vma, pd);
-	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
-		/*
-		 * The rcvhdrq itself; readonly except on HT (so have
-		 * to allow writable mapping), multiple pages, contiguous
-		 * from an i/o perspective.
-		 */
-		ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
-				     pd->port_rcvhdrq,
-				     "rcvhdrq");
-	else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
-		/* in-memory copy of rcvhdrq tail register */
-		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-				     pd->port_rcvhdrtail_kvaddr,
-				     "rcvhdrq tail");
-	else
-		ret = -EINVAL;
-
-	vma->vm_private_data = NULL;
-
-	if (ret < 0)
-		dev_info(&dd->pcidev->dev,
-			 "Failure %d on off %llx len %lx\n",
-			 -ret, (unsigned long long)pgaddr,
-			 vma->vm_end - vma->vm_start);
-bail:
-	return ret;
-}
-
-static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
-{
-	unsigned pollflag = 0;
-
-	if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
-	    pd->port_hdrqfull != pd->port_hdrqfull_poll) {
-		pollflag |= POLLIN | POLLRDNORM;
-		pd->port_hdrqfull_poll = pd->port_hdrqfull;
-	}
-
-	return pollflag;
-}
-
-static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
-				      struct file *fp,
-				      struct poll_table_struct *pt)
-{
-	unsigned pollflag = 0;
-	struct ipath_devdata *dd;
-
-	dd = pd->port_dd;
-
-	/* variable access in ipath_poll_hdrqfull() needs this */
-	rmb();
-	pollflag = ipath_poll_hdrqfull(pd);
-
-	if (pd->port_urgent != pd->port_urgent_poll) {
-		pollflag |= POLLIN | POLLRDNORM;
-		pd->port_urgent_poll = pd->port_urgent;
-	}
-
-	if (!pollflag) {
-		/* this saves a spin_lock/unlock in interrupt handler... */
-		set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
-		/* flush waiting flag so don't miss an event... */
-		wmb();
-		poll_wait(fp, &pd->port_wait, pt);
-	}
-
-	return pollflag;
-}
-
-static unsigned int ipath_poll_next(struct ipath_portdata *pd,
-				    struct file *fp,
-				    struct poll_table_struct *pt)
-{
-	u32 head;
-	u32 tail;
-	unsigned pollflag = 0;
-	struct ipath_devdata *dd;
-
-	dd = pd->port_dd;
-
-	/* variable access in ipath_poll_hdrqfull() needs this */
-	rmb();
-	pollflag = ipath_poll_hdrqfull(pd);
-
-	head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-	if (pd->port_rcvhdrtail_kvaddr)
-		tail = ipath_get_rcvhdrtail(pd);
-	else
-		tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-
-	if (head != tail)
-		pollflag |= POLLIN | POLLRDNORM;
-	else {
-		/* this saves a spin_lock/unlock in interrupt handler */
-		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-		/* flush waiting flag so we don't miss an event */
-		wmb();
-
-		set_bit(pd->port_port + dd->ipath_r_intravail_shift,
-			&dd->ipath_rcvctrl);
-
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-				 dd->ipath_rcvctrl);
-
-		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-			ipath_write_ureg(dd, ur_rcvhdrhead,
-					 dd->ipath_rhdrhead_intr_off | head,
-					 pd->port_port);
-
-		poll_wait(fp, &pd->port_wait, pt);
-	}
-
-	return pollflag;
-}
-
-static unsigned int ipath_poll(struct file *fp,
-			       struct poll_table_struct *pt)
-{
-	struct ipath_portdata *pd;
-	unsigned pollflag;
-
-	pd = port_fp(fp);
-	if (!pd)
-		pollflag = 0;
-	else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
-		pollflag = ipath_poll_urgent(pd, fp, pt);
-	else
-		pollflag = ipath_poll_next(pd, fp, pt);
-
-	return pollflag;
-}
-
-static int ipath_supports_subports(int user_swmajor, int user_swminor)
-{
-	/* no subport implementation prior to software version 1.3 */
-	return (user_swmajor > 1) || (user_swminor >= 3);
-}
-
-static int ipath_compatible_subports(int user_swmajor, int user_swminor)
-{
-	/* this code is written long-hand for clarity */
-	if (IPATH_USER_SWMAJOR != user_swmajor) {
-		/* no promise of compatibility if major mismatch */
-		return 0;
-	}
-	if (IPATH_USER_SWMAJOR == 1) {
-		switch (IPATH_USER_SWMINOR) {
-		case 0:
-		case 1:
-		case 2:
-			/* no subport implementation so cannot be compatible */
-			return 0;
-		case 3:
-			/* 3 is only compatible with itself */
-			return user_swminor == 3;
-		default:
-			/* >= 4 are compatible (or are expected to be) */
-			return user_swminor >= 4;
-		}
-	}
-	/* make no promises yet for future major versions */
-	return 0;
-}
-
-static int init_subports(struct ipath_devdata *dd,
-			 struct ipath_portdata *pd,
-			 const struct ipath_user_info *uinfo)
-{
-	int ret = 0;
-	unsigned num_subports;
-	size_t size;
-
-	/*
-	 * If the user is requesting zero subports,
-	 * skip the subport allocation.
-	 */
-	if (uinfo->spu_subport_cnt <= 0)
-		goto bail;
-
-	/* Self-consistency check for ipath_compatible_subports() */
-	if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
-	    !ipath_compatible_subports(IPATH_USER_SWMAJOR,
-				       IPATH_USER_SWMINOR)) {
-		dev_info(&dd->pcidev->dev,
-			 "Inconsistent ipath_compatible_subports()\n");
-		goto bail;
-	}
-
-	/* Check for subport compatibility */
-	if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
-				       uinfo->spu_userversion & 0xffff)) {
-		dev_info(&dd->pcidev->dev,
-			 "Mismatched user version (%d.%d) and driver "
-			 "version (%d.%d) while port sharing. Ensure "
-                         "that driver and library are from the same "
-                         "release.\n",
-			 (int) (uinfo->spu_userversion >> 16),
-                         (int) (uinfo->spu_userversion & 0xffff),
-			 IPATH_USER_SWMAJOR,
-	                 IPATH_USER_SWMINOR);
-		goto bail;
-	}
-	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	num_subports = uinfo->spu_subport_cnt;
-	pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
-	if (!pd->subport_uregbase) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
-	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-		     sizeof(u32), PAGE_SIZE) * num_subports;
-	pd->subport_rcvhdr_base = vzalloc(size);
-	if (!pd->subport_rcvhdr_base) {
-		ret = -ENOMEM;
-		goto bail_ureg;
-	}
-
-	pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
-					pd->port_rcvegrbuf_size *
-					num_subports);
-	if (!pd->subport_rcvegrbuf) {
-		ret = -ENOMEM;
-		goto bail_rhdr;
-	}
-
-	pd->port_subport_cnt = uinfo->spu_subport_cnt;
-	pd->port_subport_id = uinfo->spu_subport_id;
-	pd->active_slaves = 1;
-	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-	goto bail;
-
-bail_rhdr:
-	vfree(pd->subport_rcvhdr_base);
-bail_ureg:
-	vfree(pd->subport_uregbase);
-	pd->subport_uregbase = NULL;
-bail:
-	return ret;
-}
-
-static int try_alloc_port(struct ipath_devdata *dd, int port,
-			  struct file *fp,
-			  const struct ipath_user_info *uinfo)
-{
-	struct ipath_portdata *pd;
-	int ret;
-
-	if (!(pd = dd->ipath_pd[port])) {
-		void *ptmp;
-
-		pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
-
-		/*
-		 * Allocate memory for use in ipath_tid_update() just once
-		 * at open, not per call.  Reduces cost of expected send
-		 * setup.
-		 */
-		ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
-			       dd->ipath_rcvtidcnt * sizeof(struct page **),
-			       GFP_KERNEL);
-		if (!pd || !ptmp) {
-			ipath_dev_err(dd, "Unable to allocate portdata "
-				      "memory, failing open\n");
-			ret = -ENOMEM;
-			kfree(pd);
-			kfree(ptmp);
-			goto bail;
-		}
-		dd->ipath_pd[port] = pd;
-		dd->ipath_pd[port]->port_port = port;
-		dd->ipath_pd[port]->port_dd = dd;
-		dd->ipath_pd[port]->port_tid_pg_list = ptmp;
-		init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
-	}
-	if (!pd->port_cnt) {
-		pd->userversion = uinfo->spu_userversion;
-		init_user_egr_sizes(pd);
-		if ((ret = init_subports(dd, pd, uinfo)) != 0)
-			goto bail;
-		ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
-			   current->comm, current->pid, dd->ipath_unit,
-			   port);
-		pd->port_cnt = 1;
-		port_fp(fp) = pd;
-		pd->port_pid = get_pid(task_pid(current));
-		strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
-		ipath_stats.sps_ports++;
-		ret = 0;
-	} else
-		ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-static inline int usable(struct ipath_devdata *dd)
-{
-	return dd &&
-		(dd->ipath_flags & IPATH_PRESENT) &&
-		dd->ipath_kregbase &&
-		dd->ipath_lid &&
-		!(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
-				     | IPATH_LINKUNK));
-}
-
-static int find_free_port(int unit, struct file *fp,
-			  const struct ipath_user_info *uinfo)
-{
-	struct ipath_devdata *dd = ipath_lookup(unit);
-	int ret, i;
-
-	if (!dd) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (!usable(dd)) {
-		ret = -ENETDOWN;
-		goto bail;
-	}
-
-	for (i = 1; i < dd->ipath_cfgports; i++) {
-		ret = try_alloc_port(dd, i, fp, uinfo);
-		if (ret != -EBUSY)
-			goto bail;
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-static int find_best_unit(struct file *fp,
-			  const struct ipath_user_info *uinfo)
-{
-	int ret = 0, i, prefunit = -1, devmax;
-	int maxofallports, npresent, nup;
-	int ndev;
-
-	devmax = ipath_count_units(&npresent, &nup, &maxofallports);
-
-	/*
-	 * This code is present to allow a knowledgeable person to
-	 * specify the layout of processes to processors before opening
-	 * this driver, and then we'll assign the process to the "closest"
-	 * InfiniPath chip to that processor (we assume reasonable connectivity,
-	 * for now).  This code assumes that if affinity has been set
-	 * before this point, that at most one cpu is set; for now this
-	 * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
-	 * in case some kernel variant sets none of the bits when no
-	 * affinity is set.  2.6.11 and 12 kernels have all present
-	 * cpus set.  Some day we'll have to fix it up further to handle
-	 * a cpu subset.  This algorithm fails for two HT chips connected
-	 * in tunnel fashion.  Eventually this needs real topology
-	 * information.  There may be some issues with dual core numbering
-	 * as well.  This needs more work prior to release.
-	 */
-	if (!cpumask_empty(tsk_cpus_allowed(current)) &&
-	    !cpumask_full(tsk_cpus_allowed(current))) {
-		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
-		get_online_cpus();
-		for_each_online_cpu(i)
-			if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
-				ipath_cdbg(PROC, "%s[%u] affinity set for "
-					   "cpu %d/%d\n", current->comm,
-					   current->pid, i, ncpus);
-				curcpu = i;
-				nset++;
-			}
-		put_online_cpus();
-		if (curcpu != -1 && nset != ncpus) {
-			if (npresent) {
-				prefunit = curcpu / (ncpus / npresent);
-				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
-					  "%d cpus/chip, select unit %d\n",
-					  current->comm, current->pid,
-					  npresent, ncpus, ncpus / npresent,
-					  prefunit);
-			}
-		}
-	}
-
-	/*
-	 * user ports start at 1, kernel port is 0
-	 * For now, we do round-robin access across all chips
-	 */
-
-	if (prefunit != -1)
-		devmax = prefunit + 1;
-recheck:
-	for (i = 1; i < maxofallports; i++) {
-		for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
-		     ndev++) {
-			struct ipath_devdata *dd = ipath_lookup(ndev);
-
-			if (!usable(dd))
-				continue; /* can't use this unit */
-			if (i >= dd->ipath_cfgports)
-				/*
-				 * Maxed out on users of this unit. Try
-				 * next.
-				 */
-				continue;
-			ret = try_alloc_port(dd, i, fp, uinfo);
-			if (!ret)
-				goto done;
-		}
-	}
-
-	if (npresent) {
-		if (nup == 0) {
-			ret = -ENETDOWN;
-			ipath_dbg("No ports available (none initialized "
-				  "and ready)\n");
-		} else {
-			if (prefunit > 0) {
-				/* if started above 0, retry from 0 */
-				ipath_cdbg(PROC,
-					   "%s[%u] no ports on prefunit "
-					   "%d, clear and re-check\n",
-					   current->comm, current->pid,
-					   prefunit);
-				devmax = ipath_count_units(NULL, NULL,
-							   NULL);
-				prefunit = -1;
-				goto recheck;
-			}
-			ret = -EBUSY;
-			ipath_dbg("No ports available\n");
-		}
-	} else {
-		ret = -ENXIO;
-		ipath_dbg("No boards found\n");
-	}
-
-done:
-	return ret;
-}
-
-static int find_shared_port(struct file *fp,
-			    const struct ipath_user_info *uinfo)
-{
-	int devmax, ndev, i;
-	int ret = 0;
-
-	devmax = ipath_count_units(NULL, NULL, NULL);
-
-	for (ndev = 0; ndev < devmax; ndev++) {
-		struct ipath_devdata *dd = ipath_lookup(ndev);
-
-		if (!usable(dd))
-			continue;
-		for (i = 1; i < dd->ipath_cfgports; i++) {
-			struct ipath_portdata *pd = dd->ipath_pd[i];
-
-			/* Skip ports which are not yet open */
-			if (!pd || !pd->port_cnt)
-				continue;
-			/* Skip port if it doesn't match the requested one */
-			if (pd->port_subport_id != uinfo->spu_subport_id)
-				continue;
-			/* Verify the sharing process matches the master */
-			if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
-			    pd->userversion != uinfo->spu_userversion ||
-			    pd->port_cnt >= pd->port_subport_cnt) {
-				ret = -EINVAL;
-				goto done;
-			}
-			port_fp(fp) = pd;
-			subport_fp(fp) = pd->port_cnt++;
-			pd->port_subpid[subport_fp(fp)] =
-				get_pid(task_pid(current));
-			tidcursor_fp(fp) = 0;
-			pd->active_slaves |= 1 << subport_fp(fp);
-			ipath_cdbg(PROC,
-				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
-				   current->comm, current->pid,
-				   subport_fp(fp),
-				   pd->port_comm, pid_nr(pd->port_pid),
-				   dd->ipath_unit, pd->port_port);
-			ret = 1;
-			goto done;
-		}
-	}
-
-done:
-	return ret;
-}
-
-static int ipath_open(struct inode *in, struct file *fp)
-{
-	/* The real work is performed later in ipath_assign_port() */
-	fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
-	return fp->private_data ? 0 : -ENOMEM;
-}
-
-/* Get port early, so can set affinity prior to memory allocation */
-static int ipath_assign_port(struct file *fp,
-			      const struct ipath_user_info *uinfo)
-{
-	int ret;
-	int i_minor;
-	unsigned swmajor, swminor;
-
-	/* Check to be sure we haven't already initialized this file */
-	if (port_fp(fp)) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	/* for now, if major version is different, bail */
-	swmajor = uinfo->spu_userversion >> 16;
-	if (swmajor != IPATH_USER_SWMAJOR) {
-		ipath_dbg("User major version %d not same as driver "
-			  "major %d\n", uinfo->spu_userversion >> 16,
-			  IPATH_USER_SWMAJOR);
-		ret = -ENODEV;
-		goto done;
-	}
-
-	swminor = uinfo->spu_userversion & 0xffff;
-	if (swminor != IPATH_USER_SWMINOR)
-		ipath_dbg("User minor version %d not same as driver "
-			  "minor %d\n", swminor, IPATH_USER_SWMINOR);
-
-	mutex_lock(&ipath_mutex);
-
-	if (ipath_compatible_subports(swmajor, swminor) &&
-	    uinfo->spu_subport_cnt &&
-	    (ret = find_shared_port(fp, uinfo))) {
-		if (ret > 0)
-			ret = 0;
-		goto done_chk_sdma;
-	}
-
-	i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
-	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-		   (long)file_inode(fp)->i_rdev, i_minor);
-
-	if (i_minor)
-		ret = find_free_port(i_minor - 1, fp, uinfo);
-	else
-		ret = find_best_unit(fp, uinfo);
-
-done_chk_sdma:
-	if (!ret) {
-		struct ipath_filedata *fd = fp->private_data;
-		const struct ipath_portdata *pd = fd->pd;
-		const struct ipath_devdata *dd = pd->port_dd;
-
-		fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
-						      dd->ipath_unit,
-						      pd->port_port,
-						      fd->subport);
-
-		if (!fd->pq)
-			ret = -ENOMEM;
-	}
-
-	mutex_unlock(&ipath_mutex);
-
-done:
-	return ret;
-}
-
-
-static int ipath_do_user_init(struct file *fp,
-			      const struct ipath_user_info *uinfo)
-{
-	int ret;
-	struct ipath_portdata *pd = port_fp(fp);
-	struct ipath_devdata *dd;
-	u32 head32;
-
-	/* Subports don't need to initialize anything since master did it. */
-	if (subport_fp(fp)) {
-		ret = wait_event_interruptible(pd->port_wait,
-			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
-		goto done;
-	}
-
-	dd = pd->port_dd;
-
-	if (uinfo->spu_rcvhdrsize) {
-		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-		if (ret)
-			goto done;
-	}
-
-	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-	/* some ports may get extra buffers, calculate that here */
-	if (pd->port_port <= dd->ipath_ports_extrabuf)
-		pd->port_piocnt = dd->ipath_pbufsport + 1;
-	else
-		pd->port_piocnt = dd->ipath_pbufsport;
-
-	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
-	if (pd->port_port <= dd->ipath_ports_extrabuf)
-		pd->port_pio_base = (dd->ipath_pbufsport + 1)
-			* (pd->port_port - 1);
-	else
-		pd->port_pio_base = dd->ipath_ports_extrabuf +
-			dd->ipath_pbufsport * (pd->port_port - 1);
-	pd->port_piobufs = dd->ipath_piobufbase +
-		pd->port_pio_base * dd->ipath_palign;
-	ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
-		" first pio %u\n", pd->port_port, pd->port_piobufs,
-		pd->port_piocnt, pd->port_pio_base);
-	ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
-
-	/*
-	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-	 * array for time being.  If pd->port_port > chip-supported,
-	 * we need to do extra stuff here to handle by handling overflow
-	 * through port 0, someday
-	 */
-	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret)
-		ret = ipath_create_user_egr(pd);
-	if (ret)
-		goto done;
-
-	/*
-	 * set the eager head register for this port to the current values
-	 * of the tail pointers, since we don't know if they were
-	 * updated on last use of the port.
-	 */
-	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-	pd->port_lastrcvhdrqtail = -1;
-	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-		pd->port_port, head32);
-	pd->port_tidcursor = 0;	/* start at beginning after open */
-
-	/* initialize poll variables... */
-	pd->port_urgent = 0;
-	pd->port_urgent_poll = 0;
-	pd->port_hdrqfull_poll = pd->port_hdrqfull;
-
-	/*
-	 * Now enable the port for receive.
-	 * For chips that are set to DMA the tail register to memory
-	 * when they change (and when the update bit transitions from
-	 * 0 to 1.  So for those chips, we turn it off and then back on.
-	 * This will (very briefly) affect any other open ports, but the
-	 * duration is very short, and therefore isn't an issue.  We
-	 * explicitly set the in-memory tail copy to 0 beforehand, so we
-	 * don't have to wait to be sure the DMA update has happened
-	 * (chip resets head/tail to 0 on transition to enable).
-	 */
-	set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-		&dd->ipath_rcvctrl);
-	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-		if (pd->port_rcvhdrtail_kvaddr)
-			ipath_clear_rcvhdrtail(pd);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			dd->ipath_rcvctrl &
-			~(1ULL << dd->ipath_r_tailupd_shift));
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-	/* Notify any waiting slaves */
-	if (pd->port_subport_cnt) {
-		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-		wake_up(&pd->port_wait);
-	}
-done:
-	return ret;
-}
-
-/**
- * unlock_exptid - unlock any expected TID entries port still had in use
- * @pd: port
- *
- * We don't actually update the chip here, because we do a bulk update
- * below, using ipath_f_clear_tids.
- */
-static void unlock_expected_tids(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
-	int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-
-	ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
-		   pd->port_port);
-	for (i = port_tidbase; i < maxtid; i++) {
-		struct page *ps = dd->ipath_pageshadow[i];
-
-		if (!ps)
-			continue;
-
-		dd->ipath_pageshadow[i] = NULL;
-		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
-			PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		ipath_release_user_pages_on_close(&ps, 1);
-		cnt++;
-		ipath_stats.sps_pageunlocks++;
-	}
-	if (cnt)
-		ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
-			   pd->port_port, cnt);
-
-	if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
-		ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
-			   (unsigned long long) ipath_stats.sps_pagelocks,
-			   (unsigned long long)
-			   ipath_stats.sps_pageunlocks);
-}
-
-static int ipath_close(struct inode *in, struct file *fp)
-{
-	struct ipath_filedata *fd;
-	struct ipath_portdata *pd;
-	struct ipath_devdata *dd;
-	unsigned long flags;
-	unsigned port;
-	struct pid *pid;
-
-	ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
-		   (long)in->i_rdev, fp->private_data);
-
-	mutex_lock(&ipath_mutex);
-
-	fd = fp->private_data;
-	fp->private_data = NULL;
-	pd = fd->pd;
-	if (!pd) {
-		mutex_unlock(&ipath_mutex);
-		goto bail;
-	}
-
-	dd = pd->port_dd;
-
-	/* drain user sdma queue */
-	ipath_user_sdma_queue_drain(dd, fd->pq);
-	ipath_user_sdma_queue_destroy(fd->pq);
-
-	if (--pd->port_cnt) {
-		/*
-		 * XXX If the master closes the port before the slave(s),
-		 * revoke the mmap for the eager receive queue so
-		 * the slave(s) don't wait for receive data forever.
-		 */
-		pd->active_slaves &= ~(1 << fd->subport);
-		put_pid(pd->port_subpid[fd->subport]);
-		pd->port_subpid[fd->subport] = NULL;
-		mutex_unlock(&ipath_mutex);
-		goto bail;
-	}
-	/* early; no interrupt users after this */
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	port = pd->port_port;
-	dd->ipath_pd[port] = NULL;
-	pid = pd->port_pid;
-	pd->port_pid = NULL;
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-	if (pd->port_rcvwait_to || pd->port_piowait_to
-	    || pd->port_rcvnowait || pd->port_pionowait) {
-		ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
-			   "%u rcv %u, pio already\n",
-			   pd->port_port, pd->port_rcvwait_to,
-			   pd->port_piowait_to, pd->port_rcvnowait,
-			   pd->port_pionowait);
-		pd->port_rcvwait_to = pd->port_piowait_to =
-			pd->port_rcvnowait = pd->port_pionowait = 0;
-	}
-	if (pd->port_flag) {
-		ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
-			  pd->port_port, pd->port_flag);
-		pd->port_flag = 0;
-	}
-
-	if (dd->ipath_kregbase) {
-		/* atomically clear receive enable port and intr avail. */
-		clear_bit(dd->ipath_r_portenable_shift + port,
-			  &dd->ipath_rcvctrl);
-		clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
-			  &dd->ipath_rcvctrl);
-		ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
-			dd->ipath_rcvctrl);
-		/* and read back from chip to be sure that nothing
-		 * else is in flight when we do the rest */
-		(void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-		/* clean up the pkeys for this port user */
-		ipath_clean_part_key(pd, dd);
-		/*
-		 * be paranoid, and never write 0's to these, just use an
-		 * unused part of the port 0 tail page.  Of course,
-		 * rcvhdraddr points to a large chunk of memory, so this
-		 * could still trash things, but at least it won't trash
-		 * page 0, and by disabling the port, it should stop "soon",
-		 * even if a packet or two is in already in flight after we
-		 * disabled the port.
-		 */
-		ipath_write_kreg_port(dd,
-		        dd->ipath_kregs->kr_rcvhdrtailaddr, port,
-			dd->ipath_dummy_hdrq_phys);
-		ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-			pd->port_port, dd->ipath_dummy_hdrq_phys);
-
-		ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
-		ipath_chg_pioavailkernel(dd, pd->port_pio_base,
-			pd->port_piocnt, 1);
-
-		dd->ipath_f_clear_tids(dd, pd->port_port);
-
-		if (dd->ipath_pageshadow)
-			unlock_expected_tids(pd);
-		ipath_stats.sps_ports--;
-		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-			   pd->port_comm, pid_nr(pid),
-			   dd->ipath_unit, port);
-	}
-
-	put_pid(pid);
-	mutex_unlock(&ipath_mutex);
-	ipath_free_pddata(dd, pd); /* after releasing the mutex */
-
-bail:
-	kfree(fd);
-	return 0;
-}
-
-static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
-			   struct ipath_port_info __user *uinfo)
-{
-	struct ipath_port_info info;
-	int nup;
-	int ret;
-	size_t sz;
-
-	(void) ipath_count_units(NULL, &nup, NULL);
-	info.num_active = nup;
-	info.unit = pd->port_dd->ipath_unit;
-	info.port = pd->port_port;
-	info.subport = subport;
-	/* Don't return new fields if old library opened the port. */
-	if (ipath_supports_subports(pd->userversion >> 16,
-				    pd->userversion & 0xffff)) {
-		/* Number of user ports available for this device. */
-		info.num_ports = pd->port_dd->ipath_cfgports - 1;
-		info.num_subports = pd->port_subport_cnt;
-		sz = sizeof(info);
-	} else
-		sz = sizeof(info) - 2 * sizeof(u16);
-
-	if (copy_to_user(uinfo, &info, sz)) {
-		ret = -EFAULT;
-		goto bail;
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int ipath_get_slave_info(struct ipath_portdata *pd,
-				void __user *slave_mask_addr)
-{
-	int ret = 0;
-
-	if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
-		ret = -EFAULT;
-	return ret;
-}
-
-static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
-				   u32 __user *inflightp)
-{
-	const u32 val = ipath_user_sdma_inflight_counter(pq);
-
-	if (put_user(val, inflightp))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int ipath_sdma_get_complete(struct ipath_devdata *dd,
-				   struct ipath_user_sdma_queue *pq,
-				   u32 __user *completep)
-{
-	u32 val;
-	int err;
-
-	err = ipath_user_sdma_make_progress(dd, pq);
-	if (err < 0)
-		return err;
-
-	val = ipath_user_sdma_complete_counter(pq);
-	if (put_user(val, completep))
-		return -EFAULT;
-
-	return 0;
-}
-
-static ssize_t ipath_write(struct file *fp, const char __user *data,
-			   size_t count, loff_t *off)
-{
-	const struct ipath_cmd __user *ucmd;
-	struct ipath_portdata *pd;
-	const void __user *src;
-	size_t consumed, copy;
-	struct ipath_cmd cmd;
-	ssize_t ret = 0;
-	void *dest;
-
-	if (count < sizeof(cmd.type)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ucmd = (const struct ipath_cmd __user *) data;
-
-	if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	consumed = sizeof(cmd.type);
-
-	switch (cmd.type) {
-	case IPATH_CMD_ASSIGN_PORT:
-	case __IPATH_CMD_USER_INIT:
-	case IPATH_CMD_USER_INIT:
-		copy = sizeof(cmd.cmd.user_info);
-		dest = &cmd.cmd.user_info;
-		src = &ucmd->cmd.user_info;
-		break;
-	case IPATH_CMD_RECV_CTRL:
-		copy = sizeof(cmd.cmd.recv_ctrl);
-		dest = &cmd.cmd.recv_ctrl;
-		src = &ucmd->cmd.recv_ctrl;
-		break;
-	case IPATH_CMD_PORT_INFO:
-		copy = sizeof(cmd.cmd.port_info);
-		dest = &cmd.cmd.port_info;
-		src = &ucmd->cmd.port_info;
-		break;
-	case IPATH_CMD_TID_UPDATE:
-	case IPATH_CMD_TID_FREE:
-		copy = sizeof(cmd.cmd.tid_info);
-		dest = &cmd.cmd.tid_info;
-		src = &ucmd->cmd.tid_info;
-		break;
-	case IPATH_CMD_SET_PART_KEY:
-		copy = sizeof(cmd.cmd.part_key);
-		dest = &cmd.cmd.part_key;
-		src = &ucmd->cmd.part_key;
-		break;
-	case __IPATH_CMD_SLAVE_INFO:
-		copy = sizeof(cmd.cmd.slave_mask_addr);
-		dest = &cmd.cmd.slave_mask_addr;
-		src = &ucmd->cmd.slave_mask_addr;
-		break;
-	case IPATH_CMD_PIOAVAILUPD:	// force an update of PIOAvail reg
-		copy = 0;
-		src = NULL;
-		dest = NULL;
-		break;
-	case IPATH_CMD_POLL_TYPE:
-		copy = sizeof(cmd.cmd.poll_type);
-		dest = &cmd.cmd.poll_type;
-		src = &ucmd->cmd.poll_type;
-		break;
-	case IPATH_CMD_ARMLAUNCH_CTRL:
-		copy = sizeof(cmd.cmd.armlaunch_ctrl);
-		dest = &cmd.cmd.armlaunch_ctrl;
-		src = &ucmd->cmd.armlaunch_ctrl;
-		break;
-	case IPATH_CMD_SDMA_INFLIGHT:
-		copy = sizeof(cmd.cmd.sdma_inflight);
-		dest = &cmd.cmd.sdma_inflight;
-		src = &ucmd->cmd.sdma_inflight;
-		break;
-	case IPATH_CMD_SDMA_COMPLETE:
-		copy = sizeof(cmd.cmd.sdma_complete);
-		dest = &cmd.cmd.sdma_complete;
-		src = &ucmd->cmd.sdma_complete;
-		break;
-	default:
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (copy) {
-		if ((count - consumed) < copy) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		if (copy_from_user(dest, src, copy)) {
-			ret = -EFAULT;
-			goto bail;
-		}
-
-		consumed += copy;
-	}
-
-	pd = port_fp(fp);
-	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
-		cmd.type != IPATH_CMD_ASSIGN_PORT) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	switch (cmd.type) {
-	case IPATH_CMD_ASSIGN_PORT:
-		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-		if (ret)
-			goto bail;
-		break;
-	case __IPATH_CMD_USER_INIT:
-		/* backwards compatibility, get port first */
-		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-		if (ret)
-			goto bail;
-		/* and fall through to current version. */
-	case IPATH_CMD_USER_INIT:
-		ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
-		if (ret)
-			goto bail;
-		ret = ipath_get_base_info(
-			fp, (void __user *) (unsigned long)
-			cmd.cmd.user_info.spu_base_info,
-			cmd.cmd.user_info.spu_base_info_size);
-		break;
-	case IPATH_CMD_RECV_CTRL:
-		ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
-		break;
-	case IPATH_CMD_PORT_INFO:
-		ret = ipath_port_info(pd, subport_fp(fp),
-				      (struct ipath_port_info __user *)
-				      (unsigned long) cmd.cmd.port_info);
-		break;
-	case IPATH_CMD_TID_UPDATE:
-		ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
-		break;
-	case IPATH_CMD_TID_FREE:
-		ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
-		break;
-	case IPATH_CMD_SET_PART_KEY:
-		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
-		break;
-	case __IPATH_CMD_SLAVE_INFO:
-		ret = ipath_get_slave_info(pd,
-					   (void __user *) (unsigned long)
-					   cmd.cmd.slave_mask_addr);
-		break;
-	case IPATH_CMD_PIOAVAILUPD:
-		ipath_force_pio_avail_update(pd->port_dd);
-		break;
-	case IPATH_CMD_POLL_TYPE:
-		pd->poll_type = cmd.cmd.poll_type;
-		break;
-	case IPATH_CMD_ARMLAUNCH_CTRL:
-		if (cmd.cmd.armlaunch_ctrl)
-			ipath_enable_armlaunch(pd->port_dd);
-		else
-			ipath_disable_armlaunch(pd->port_dd);
-		break;
-	case IPATH_CMD_SDMA_INFLIGHT:
-		ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
-					      (u32 __user *) (unsigned long)
-					      cmd.cmd.sdma_inflight);
-		break;
-	case IPATH_CMD_SDMA_COMPLETE:
-		ret = ipath_sdma_get_complete(pd->port_dd,
-					      user_sdma_queue_fp(fp),
-					      (u32 __user *) (unsigned long)
-					      cmd.cmd.sdma_complete);
-		break;
-	}
-
-	if (ret >= 0)
-		ret = consumed;
-
-bail:
-	return ret;
-}
-
-static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
-	struct file *filp = iocb->ki_filp;
-	struct ipath_filedata *fp = filp->private_data;
-	struct ipath_portdata *pd = port_fp(filp);
-	struct ipath_user_sdma_queue *pq = fp->pq;
-
-	if (!iter_is_iovec(from) || !from->nr_segs)
-		return -EINVAL;
-
-	return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
-}
-
-static struct class *ipath_class;
-
-static int init_cdev(int minor, char *name, const struct file_operations *fops,
-		     struct cdev **cdevp, struct device **devp)
-{
-	const dev_t dev = MKDEV(IPATH_MAJOR, minor);
-	struct cdev *cdev = NULL;
-	struct device *device = NULL;
-	int ret;
-
-	cdev = cdev_alloc();
-	if (!cdev) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not allocate cdev for minor %d, %s\n",
-		       minor, name);
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	cdev->owner = THIS_MODULE;
-	cdev->ops = fops;
-	kobject_set_name(&cdev->kobj, name);
-
-	ret = cdev_add(cdev, dev, 1);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not add cdev for minor %d, %s (err %d)\n",
-		       minor, name, -ret);
-		goto err_cdev;
-	}
-
-	device = device_create(ipath_class, NULL, dev, NULL, name);
-
-	if (IS_ERR(device)) {
-		ret = PTR_ERR(device);
-		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-		       "device for minor %d, %s (err %d)\n",
-		       minor, name, -ret);
-		goto err_cdev;
-	}
-
-	goto done;
-
-err_cdev:
-	cdev_del(cdev);
-	cdev = NULL;
-
-done:
-	if (ret >= 0) {
-		*cdevp = cdev;
-		*devp = device;
-	} else {
-		*cdevp = NULL;
-		*devp = NULL;
-	}
-
-	return ret;
-}
-
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-		    struct cdev **cdevp, struct device **devp)
-{
-	return init_cdev(minor, name, fops, cdevp, devp);
-}
-
-static void cleanup_cdev(struct cdev **cdevp,
-			 struct device **devp)
-{
-	struct device *dev = *devp;
-
-	if (dev) {
-		device_unregister(dev);
-		*devp = NULL;
-	}
-
-	if (*cdevp) {
-		cdev_del(*cdevp);
-		*cdevp = NULL;
-	}
-}
-
-void ipath_cdev_cleanup(struct cdev **cdevp,
-			struct device **devp)
-{
-	cleanup_cdev(cdevp, devp);
-}
-
-static struct cdev *wildcard_cdev;
-static struct device *wildcard_dev;
-
-static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
-
-static int user_init(void)
-{
-	int ret;
-
-	ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
-		       "chrdev region (err %d)\n", -ret);
-		goto done;
-	}
-
-	ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
-
-	if (IS_ERR(ipath_class)) {
-		ret = PTR_ERR(ipath_class);
-		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-		       "device class (err %d)\n", -ret);
-		goto bail;
-	}
-
-	goto done;
-bail:
-	unregister_chrdev_region(dev, IPATH_NMINORS);
-done:
-	return ret;
-}
-
-static void user_cleanup(void)
-{
-	if (ipath_class) {
-		class_destroy(ipath_class);
-		ipath_class = NULL;
-	}
-
-	unregister_chrdev_region(dev, IPATH_NMINORS);
-}
-
-static atomic_t user_count = ATOMIC_INIT(0);
-static atomic_t user_setup = ATOMIC_INIT(0);
-
-int ipath_user_add(struct ipath_devdata *dd)
-{
-	char name[10];
-	int ret;
-
-	if (atomic_inc_return(&user_count) == 1) {
-		ret = user_init();
-		if (ret < 0) {
-			ipath_dev_err(dd, "Unable to set up user support: "
-				      "error %d\n", -ret);
-			goto bail;
-		}
-		ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
-				&wildcard_dev);
-		if (ret < 0) {
-			ipath_dev_err(dd, "Could not create wildcard "
-				      "minor: error %d\n", -ret);
-			goto bail_user;
-		}
-
-		atomic_set(&user_setup, 1);
-	}
-
-	snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
-
-	ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
-			&dd->user_cdev, &dd->user_dev);
-	if (ret < 0)
-		ipath_dev_err(dd, "Could not create user minor %d, %s\n",
-			      dd->ipath_unit + 1, name);
-
-	goto bail;
-
-bail_user:
-	user_cleanup();
-bail:
-	return ret;
-}
-
-void ipath_user_remove(struct ipath_devdata *dd)
-{
-	cleanup_cdev(&dd->user_cdev, &dd->user_dev);
-
-	if (atomic_dec_return(&user_count) == 0) {
-		if (atomic_read(&user_setup) == 0)
-			goto bail;
-
-		cleanup_cdev(&wildcard_cdev, &wildcard_dev);
-		user_cleanup();
-
-		atomic_set(&user_setup, 0);
-	}
-bail:
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
deleted file mode 100644
index 476fcdf..0000000
--- a/drivers/staging/rdma/ipath/ipath_fs.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-#define IPATHFS_MAGIC 0x726a77
-
-static struct super_block *ipath_super;
-
-static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
-			 umode_t mode, const struct file_operations *fops,
-			 void *data)
-{
-	int error;
-	struct inode *inode = new_inode(dir->i_sb);
-
-	if (!inode) {
-		error = -EPERM;
-		goto bail;
-	}
-
-	inode->i_ino = get_next_ino();
-	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->i_private = data;
-	if (S_ISDIR(mode)) {
-		inode->i_op = &simple_dir_inode_operations;
-		inc_nlink(inode);
-		inc_nlink(dir);
-	}
-
-	inode->i_fop = fops;
-
-	d_instantiate(dentry, inode);
-	error = 0;
-
-bail:
-	return error;
-}
-
-static int create_file(const char *name, umode_t mode,
-		       struct dentry *parent, struct dentry **dentry,
-		       const struct file_operations *fops, void *data)
-{
-	int error;
-
-	inode_lock(d_inode(parent));
-	*dentry = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(*dentry))
-		error = ipathfs_mknod(d_inode(parent), *dentry,
-				      mode, fops, data);
-	else
-		error = PTR_ERR(*dentry);
-	inode_unlock(d_inode(parent));
-
-	return error;
-}
-
-static ssize_t atomic_stats_read(struct file *file, char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
-				       sizeof ipath_stats);
-}
-
-static const struct file_operations atomic_stats_ops = {
-	.read = atomic_stats_read,
-	.llseek = default_llseek,
-};
-
-static ssize_t atomic_counters_read(struct file *file, char __user *buf,
-				    size_t count, loff_t *ppos)
-{
-	struct infinipath_counters counters;
-	struct ipath_devdata *dd;
-
-	dd = file_inode(file)->i_private;
-	dd->ipath_f_read_counters(dd, &counters);
-
-	return simple_read_from_buffer(buf, count, ppos, &counters,
-				       sizeof counters);
-}
-
-static const struct file_operations atomic_counters_ops = {
-	.read = atomic_counters_read,
-	.llseek = default_llseek,
-};
-
-static ssize_t flash_read(struct file *file, char __user *buf,
-			  size_t count, loff_t *ppos)
-{
-	struct ipath_devdata *dd;
-	ssize_t ret;
-	loff_t pos;
-	char *tmp;
-
-	pos = *ppos;
-
-	if ( pos < 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (pos >= sizeof(struct ipath_flash)) {
-		ret = 0;
-		goto bail;
-	}
-
-	if (count > sizeof(struct ipath_flash) - pos)
-		count = sizeof(struct ipath_flash) - pos;
-
-	tmp = kmalloc(count, GFP_KERNEL);
-	if (!tmp) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	dd = file_inode(file)->i_private;
-	if (ipath_eeprom_read(dd, pos, tmp, count)) {
-		ipath_dev_err(dd, "failed to read from flash\n");
-		ret = -ENXIO;
-		goto bail_tmp;
-	}
-
-	if (copy_to_user(buf, tmp, count)) {
-		ret = -EFAULT;
-		goto bail_tmp;
-	}
-
-	*ppos = pos + count;
-	ret = count;
-
-bail_tmp:
-	kfree(tmp);
-
-bail:
-	return ret;
-}
-
-static ssize_t flash_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
-{
-	struct ipath_devdata *dd;
-	ssize_t ret;
-	loff_t pos;
-	char *tmp;
-
-	pos = *ppos;
-
-	if (pos != 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (count != sizeof(struct ipath_flash)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	tmp = memdup_user(buf, count);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	dd = file_inode(file)->i_private;
-	if (ipath_eeprom_write(dd, pos, tmp, count)) {
-		ret = -ENXIO;
-		ipath_dev_err(dd, "failed to write to flash\n");
-		goto bail_tmp;
-	}
-
-	*ppos = pos + count;
-	ret = count;
-
-bail_tmp:
-	kfree(tmp);
-
-bail:
-	return ret;
-}
-
-static const struct file_operations flash_ops = {
-	.read = flash_read,
-	.write = flash_write,
-	.llseek = default_llseek,
-};
-
-static int create_device_files(struct super_block *sb,
-			       struct ipath_devdata *dd)
-{
-	struct dentry *dir, *tmp;
-	char unit[10];
-	int ret;
-
-	snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-	ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
-			  &simple_dir_operations, dd);
-	if (ret) {
-		printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
-		goto bail;
-	}
-
-	ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
-			  &atomic_counters_ops, dd);
-	if (ret) {
-		printk(KERN_ERR "create_file(%s/atomic_counters) "
-		       "failed: %d\n", unit, ret);
-		goto bail;
-	}
-
-	ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
-			  &flash_ops, dd);
-	if (ret) {
-		printk(KERN_ERR "create_file(%s/flash) "
-		       "failed: %d\n", unit, ret);
-		goto bail;
-	}
-
-bail:
-	return ret;
-}
-
-static int remove_file(struct dentry *parent, char *name)
-{
-	struct dentry *tmp;
-	int ret;
-
-	tmp = lookup_one_len(name, parent, strlen(name));
-
-	if (IS_ERR(tmp)) {
-		ret = PTR_ERR(tmp);
-		goto bail;
-	}
-
-	spin_lock(&tmp->d_lock);
-	if (simple_positive(tmp)) {
-		dget_dlock(tmp);
-		__d_drop(tmp);
-		spin_unlock(&tmp->d_lock);
-		simple_unlink(d_inode(parent), tmp);
-	} else
-		spin_unlock(&tmp->d_lock);
-
-	ret = 0;
-bail:
-	/*
-	 * We don't expect clients to care about the return value, but
-	 * it's there if they need it.
-	 */
-	return ret;
-}
-
-static int remove_device_files(struct super_block *sb,
-			       struct ipath_devdata *dd)
-{
-	struct dentry *dir, *root;
-	char unit[10];
-	int ret;
-
-	root = dget(sb->s_root);
-	inode_lock(d_inode(root));
-	snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-	dir = lookup_one_len(unit, root, strlen(unit));
-
-	if (IS_ERR(dir)) {
-		ret = PTR_ERR(dir);
-		printk(KERN_ERR "Lookup of %s failed\n", unit);
-		goto bail;
-	}
-
-	remove_file(dir, "flash");
-	remove_file(dir, "atomic_counters");
-	d_delete(dir);
-	ret = simple_rmdir(d_inode(root), dir);
-
-bail:
-	inode_unlock(d_inode(root));
-	dput(root);
-	return ret;
-}
-
-static int ipathfs_fill_super(struct super_block *sb, void *data,
-			      int silent)
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-	int ret;
-
-	static struct tree_descr files[] = {
-		[2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
-		{""},
-	};
-
-	ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
-	if (ret) {
-		printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-		ret = create_device_files(sb, dd);
-		if (ret)
-			goto bail;
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-bail:
-	return ret;
-}
-
-static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
-			int flags, const char *dev_name, void *data)
-{
-	struct dentry *ret;
-	ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
-	if (!IS_ERR(ret))
-		ipath_super = ret->d_sb;
-	return ret;
-}
-
-static void ipathfs_kill_super(struct super_block *s)
-{
-	kill_litter_super(s);
-	ipath_super = NULL;
-}
-
-int ipathfs_add_device(struct ipath_devdata *dd)
-{
-	int ret;
-
-	if (ipath_super == NULL) {
-		ret = 0;
-		goto bail;
-	}
-
-	ret = create_device_files(ipath_super, dd);
-
-bail:
-	return ret;
-}
-
-int ipathfs_remove_device(struct ipath_devdata *dd)
-{
-	int ret;
-
-	if (ipath_super == NULL) {
-		ret = 0;
-		goto bail;
-	}
-
-	ret = remove_device_files(ipath_super, dd);
-
-bail:
-	return ret;
-}
-
-static struct file_system_type ipathfs_fs_type = {
-	.owner =	THIS_MODULE,
-	.name =		"ipathfs",
-	.mount =	ipathfs_mount,
-	.kill_sb =	ipathfs_kill_super,
-};
-MODULE_ALIAS_FS("ipathfs");
-
-int __init ipath_init_ipathfs(void)
-{
-	return register_filesystem(&ipathfs_fs_type);
-}
-
-void __exit ipath_exit_ipathfs(void)
-{
-	unregister_filesystem(&ipathfs_fs_type);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_iba6110.c b/drivers/staging/rdma/ipath/ipath_iba6110.c
deleted file mode 100644
index 5f13572..0000000
--- a/drivers/staging/rdma/ipath/ipath_iba6110.c
+++ /dev/null
@@ -1,1939 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains all of the code that is specific to the InfiniPath
- * HT chip.
- */
-
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/htirq.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
-
-
-/*
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- *
- * The names are in InterCap form because they're taken straight from
- * the chip specification.  Since they're only used in this file, they
- * don't pollute the rest of the source.
-*/
-
-struct _infinipath_do_not_use_kernel_regs {
-	unsigned long long Revision;
-	unsigned long long Control;
-	unsigned long long PageAlign;
-	unsigned long long PortCnt;
-	unsigned long long DebugPortSelect;
-	unsigned long long DebugPort;
-	unsigned long long SendRegBase;
-	unsigned long long UserRegBase;
-	unsigned long long CounterRegBase;
-	unsigned long long Scratch;
-	unsigned long long ReservedMisc1;
-	unsigned long long InterruptConfig;
-	unsigned long long IntBlocked;
-	unsigned long long IntMask;
-	unsigned long long IntStatus;
-	unsigned long long IntClear;
-	unsigned long long ErrorMask;
-	unsigned long long ErrorStatus;
-	unsigned long long ErrorClear;
-	unsigned long long HwErrMask;
-	unsigned long long HwErrStatus;
-	unsigned long long HwErrClear;
-	unsigned long long HwDiagCtrl;
-	unsigned long long MDIO;
-	unsigned long long IBCStatus;
-	unsigned long long IBCCtrl;
-	unsigned long long ExtStatus;
-	unsigned long long ExtCtrl;
-	unsigned long long GPIOOut;
-	unsigned long long GPIOMask;
-	unsigned long long GPIOStatus;
-	unsigned long long GPIOClear;
-	unsigned long long RcvCtrl;
-	unsigned long long RcvBTHQP;
-	unsigned long long RcvHdrSize;
-	unsigned long long RcvHdrCnt;
-	unsigned long long RcvHdrEntSize;
-	unsigned long long RcvTIDBase;
-	unsigned long long RcvTIDCnt;
-	unsigned long long RcvEgrBase;
-	unsigned long long RcvEgrCnt;
-	unsigned long long RcvBufBase;
-	unsigned long long RcvBufSize;
-	unsigned long long RxIntMemBase;
-	unsigned long long RxIntMemSize;
-	unsigned long long RcvPartitionKey;
-	unsigned long long ReservedRcv[10];
-	unsigned long long SendCtrl;
-	unsigned long long SendPIOBufBase;
-	unsigned long long SendPIOSize;
-	unsigned long long SendPIOBufCnt;
-	unsigned long long SendPIOAvailAddr;
-	unsigned long long TxIntMemBase;
-	unsigned long long TxIntMemSize;
-	unsigned long long ReservedSend[9];
-	unsigned long long SendBufferError;
-	unsigned long long SendBufferErrorCONT1;
-	unsigned long long SendBufferErrorCONT2;
-	unsigned long long SendBufferErrorCONT3;
-	unsigned long long ReservedSBE[4];
-	unsigned long long RcvHdrAddr0;
-	unsigned long long RcvHdrAddr1;
-	unsigned long long RcvHdrAddr2;
-	unsigned long long RcvHdrAddr3;
-	unsigned long long RcvHdrAddr4;
-	unsigned long long RcvHdrAddr5;
-	unsigned long long RcvHdrAddr6;
-	unsigned long long RcvHdrAddr7;
-	unsigned long long RcvHdrAddr8;
-	unsigned long long ReservedRHA[7];
-	unsigned long long RcvHdrTailAddr0;
-	unsigned long long RcvHdrTailAddr1;
-	unsigned long long RcvHdrTailAddr2;
-	unsigned long long RcvHdrTailAddr3;
-	unsigned long long RcvHdrTailAddr4;
-	unsigned long long RcvHdrTailAddr5;
-	unsigned long long RcvHdrTailAddr6;
-	unsigned long long RcvHdrTailAddr7;
-	unsigned long long RcvHdrTailAddr8;
-	unsigned long long ReservedRHTA[7];
-	unsigned long long Sync;	/* Software only */
-	unsigned long long Dump;	/* Software only */
-	unsigned long long SimVer;	/* Software only */
-	unsigned long long ReservedSW[5];
-	unsigned long long SerdesConfig0;
-	unsigned long long SerdesConfig1;
-	unsigned long long SerdesStatus;
-	unsigned long long XGXSConfig;
-	unsigned long long ReservedSW2[4];
-};
-
-struct _infinipath_do_not_use_counters {
-	__u64 LBIntCnt;
-	__u64 LBFlowStallCnt;
-	__u64 Reserved1;
-	__u64 TxUnsupVLErrCnt;
-	__u64 TxDataPktCnt;
-	__u64 TxFlowPktCnt;
-	__u64 TxDwordCnt;
-	__u64 TxLenErrCnt;
-	__u64 TxMaxMinLenErrCnt;
-	__u64 TxUnderrunCnt;
-	__u64 TxFlowStallCnt;
-	__u64 TxDroppedPktCnt;
-	__u64 RxDroppedPktCnt;
-	__u64 RxDataPktCnt;
-	__u64 RxFlowPktCnt;
-	__u64 RxDwordCnt;
-	__u64 RxLenErrCnt;
-	__u64 RxMaxMinLenErrCnt;
-	__u64 RxICRCErrCnt;
-	__u64 RxVCRCErrCnt;
-	__u64 RxFlowCtrlErrCnt;
-	__u64 RxBadFormatCnt;
-	__u64 RxLinkProblemCnt;
-	__u64 RxEBPCnt;
-	__u64 RxLPCRCErrCnt;
-	__u64 RxBufOvflCnt;
-	__u64 RxTIDFullErrCnt;
-	__u64 RxTIDValidErrCnt;
-	__u64 RxPKeyMismatchCnt;
-	__u64 RxP0HdrEgrOvflCnt;
-	__u64 RxP1HdrEgrOvflCnt;
-	__u64 RxP2HdrEgrOvflCnt;
-	__u64 RxP3HdrEgrOvflCnt;
-	__u64 RxP4HdrEgrOvflCnt;
-	__u64 RxP5HdrEgrOvflCnt;
-	__u64 RxP6HdrEgrOvflCnt;
-	__u64 RxP7HdrEgrOvflCnt;
-	__u64 RxP8HdrEgrOvflCnt;
-	__u64 Reserved6;
-	__u64 Reserved7;
-	__u64 IBStatusChangeCnt;
-	__u64 IBLinkErrRecoveryCnt;
-	__u64 IBLinkDownedCnt;
-	__u64 IBSymbolErrCnt;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
-	struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-	struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_ht_kregs = {
-	.kr_control = IPATH_KREG_OFFSET(Control),
-	.kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-	.kr_debugport = IPATH_KREG_OFFSET(DebugPort),
-	.kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-	.kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-	.kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-	.kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-	.kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-	.kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-	.kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-	.kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-	.kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-	.kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-	.kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-	.kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-	.kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-	.kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-	.kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-	.kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-	.kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-	.kr_intclear = IPATH_KREG_OFFSET(IntClear),
-	.kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
-	.kr_intmask = IPATH_KREG_OFFSET(IntMask),
-	.kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-	.kr_mdio = IPATH_KREG_OFFSET(MDIO),
-	.kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-	.kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-	.kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-	.kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-	.kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-	.kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-	.kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-	.kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-	.kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-	.kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-	.kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-	.kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-	.kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-	.kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-	.kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-	.kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-	.kr_revision = IPATH_KREG_OFFSET(Revision),
-	.kr_scratch = IPATH_KREG_OFFSET(Scratch),
-	.kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-	.kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-	.kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-	.kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-	.kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-	.kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-	.kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-	.kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-	.kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-	.kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-	.kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-	.kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-	.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-	.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-	/*
-	 * These should not be used directly via ipath_write_kreg64(),
-	 * use them with ipath_write_kreg64_port(),
-	 */
-	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
-};
-
-static const struct ipath_cregs ipath_ht_cregs = {
-	.cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-	.cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-	.cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-	.cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-	.cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-	.cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-	.cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-	.cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-	.cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-	.cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-	.cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-	.cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-	/* calc from Reg_CounterRegBase + offset */
-	.cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-	.cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-	.cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-	.cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-	.cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-	.cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-	.cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-	.cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-	.cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-	.cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-	.cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-	.cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-	.cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-	.cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-	.cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-	.cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-	.cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-	.cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-	.cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-	.cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-	.cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
-#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
-#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
-#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
-#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
-#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
-#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
-#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
-#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
-#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
-
-#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
-#define IBA6110_IBCS_LINKSTATE_SHIFT 4
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
-
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
-
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD_SHIFT 31
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET          0x7ULL
-
-/*
- * masks and bits that are different in different chips, or present only
- * in one
- */
-static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
-    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
-static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
-    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
-
-static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA \
-	(1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL \
-	(1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/* keep the code below somewhat more readable; not used elsewhere */
-#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |	\
-				infinipath_hwe_htclnkabyte1crcerr)
-#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |	\
-				infinipath_hwe_htclnkbbyte1crcerr)
-#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |	\
-				infinipath_hwe_htclnkbbyte0crcerr)
-#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |	\
-				infinipath_hwe_htclnkbbyte1crcerr)
-
-static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
-			  char *msg, size_t msgl)
-{
-	char bitsmsg[64];
-	ipath_err_t crcbits = hwerrs &
-		(_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
-	/* don't check if 8bit HT */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-		crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
-	/* don't check if 8bit HT */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-		crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
-	/*
-	 * we'll want to ignore link errors on link that is
-	 * not in use, if any.  For now, complain about both
-	 */
-	if (crcbits) {
-		u16 ctrl0, ctrl1;
-		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
-			 !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
-			 "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
-				    ? "1 (B)" : "0+1 (A+B)"),
-			 !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
-			 : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
-			    "0+1"), (unsigned long long) crcbits);
-		strlcat(msg, bitsmsg, msgl);
-
-		/*
-		 * print extra info for debugging.  slave/primary
-		 * config word 4, 8 (link control 0, 1)
-		 */
-
-		if (pci_read_config_word(dd->pcidev,
-					 dd->ipath_ht_slave_off + 0x4,
-					 &ctrl0))
-			dev_info(&dd->pcidev->dev, "Couldn't read "
-				 "linkctrl0 of slave/primary "
-				 "config block\n");
-		else if (!(ctrl0 & 1 << 6))
-			/* not if EOC bit set */
-			ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
-				  ((ctrl0 >> 8) & 7) ? " CRC" : "",
-				  ((ctrl0 >> 4) & 1) ? "linkfail" :
-				  "");
-		if (pci_read_config_word(dd->pcidev,
-					 dd->ipath_ht_slave_off + 0x8,
-					 &ctrl1))
-			dev_info(&dd->pcidev->dev, "Couldn't read "
-				 "linkctrl1 of slave/primary "
-				 "config block\n");
-		else if (!(ctrl1 & 1 << 6))
-			/* not if EOC bit set */
-			ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
-				  ((ctrl1 >> 8) & 7) ? " CRC" : "",
-				  ((ctrl1 >> 4) & 1) ? "linkfail" :
-				  "");
-
-		/* disable until driver reloaded */
-		dd->ipath_hwerrmask &= ~crcbits;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-		ipath_dbg("HT crc errs: %s\n", msg);
-	} else
-		ipath_dbg("ignoring HT crc errors 0x%llx, "
-			  "not in use\n", (unsigned long long)
-			  (hwerrs & (_IPATH_HTLINK0_CRCBITS |
-				     _IPATH_HTLINK1_CRCBITS)));
-}
-
-/* 6110 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
-	INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
-	INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
-	INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
-	INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
-	INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
-	INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
-	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
-	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
-};
-
-#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
-		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
-		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
-#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
-			  << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
-
-static void ipath_ht_txe_recover(struct ipath_devdata *dd)
-{
-	++ipath_stats.sps_txeparity;
-	dev_info(&dd->pcidev->dev,
-		"Recovering from TXE PIO parity error\n");
-}
-
-
-/**
- * ipath_ht_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use.  Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue.  We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-				     size_t msgl)
-{
-	ipath_err_t hwerrs;
-	u32 bits, ctrl;
-	int isfatal = 0;
-	char bitsmsg[64];
-	int log_idx;
-
-	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-
-	if (!hwerrs) {
-		ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-		/*
-		 * better than printing cofusing messages
-		 * This seems to be related to clearing the crc error, or
-		 * the pll error during init.
-		 */
-		goto bail;
-	} else if (hwerrs == -1LL) {
-		ipath_dev_err(dd, "Read of hardware error status failed "
-			      "(all bits set); ignoring\n");
-		goto bail;
-	}
-	ipath_stats.sps_hwerrs++;
-
-	/* Always clear the error status register, except MEMBISTFAIL,
-	 * regardless of whether we continue or stop using the chip.
-	 * We want that set so we know it failed, even across driver reload.
-	 * We'll still ignore it in the hwerrmask.  We do this partly for
-	 * diagnostics, but also for support */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-	hwerrs &= dd->ipath_hwerrmask;
-
-	/* We log some errors to EEPROM, check if we have any of those. */
-	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
-		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
-			ipath_inc_eeprom_err(dd, log_idx, 1);
-
-	/*
-	 * make sure we get this much out, unless told to be quiet,
-	 * it's a parity error we may recover from,
-	 * or it's occurred within the last 5 seconds
-	 */
-	if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
-		RXE_EAGER_PARITY)) ||
-		(ipath_debug & __IPATH_VERBDBG))
-		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-			 "(cleared)\n", (unsigned long long) hwerrs);
-	dd->ipath_lasthwerror |= hwerrs;
-
-	if (hwerrs & ~dd->ipath_hwe_bitsextant)
-		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-			      "%llx set\n", (unsigned long long)
-			      (hwerrs & ~dd->ipath_hwe_bitsextant));
-
-	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
-		/*
-		 * parity errors in send memory are recoverable,
-		 * just cancel the send (if indicated in * sendbuffererror),
-		 * count the occurrence, unfreeze (if no other handled
-		 * hardware error bits are set), and continue. They can
-		 * occur if a processor speculative read is done to the PIO
-		 * buffer while we are sending a packet, for example.
-		 */
-		if (hwerrs & TXE_PIO_PARITY) {
-			ipath_ht_txe_recover(dd);
-			hwerrs &= ~TXE_PIO_PARITY;
-		}
-
-		if (!hwerrs) {
-			ipath_dbg("Clearing freezemode on ignored or "
-				  "recovered hardware error\n");
-			ipath_clear_freeze(dd);
-		}
-	}
-
-	*msg = '\0';
-
-	/*
-	 * may someday want to decode into which bits are which
-	 * functional area for parity errors, etc.
-	 */
-	if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
-		      << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-
-	ipath_format_hwerrors(hwerrs,
-			      ipath_6110_hwerror_msgs,
-			      ARRAY_SIZE(ipath_6110_hwerror_msgs),
-			      msg, msgl);
-
-	if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
-		hwerr_crcbits(dd, hwerrs, msg, msgl);
-
-	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
-			msgl);
-		/* ignore from now on, so disable until driver reloaded */
-		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-	}
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |	\
-			 INFINIPATH_HWE_COREPLL_RFSLIP |	\
-			 INFINIPATH_HWE_HTBPLL_FBSLIP |		\
-			 INFINIPATH_HWE_HTBPLL_RFSLIP |		\
-			 INFINIPATH_HWE_HTAPLL_FBSLIP |		\
-			 INFINIPATH_HWE_HTAPLL_RFSLIP)
-
-	if (hwerrs & _IPATH_PLL_FAIL) {
-		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), InfiniPath hardware unusable]",
-			 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
-		strlcat(msg, bitsmsg, msgl);
-		/* ignore from now on, so disable until driver reloaded */
-		dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-	}
-
-	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-		/*
-		 * If it occurs, it is left masked since the eternal
-		 * interface is unused
-		 */
-		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-	}
-
-	if (hwerrs) {
-		/*
-		 * if any set that we aren't ignoring; only
-		 * make the complaint once, in case it's stuck
-		 * or recurring, and we get here multiple
-		 * times.
-		 * force link down, so switch knows, and
-		 * LEDs are turned off
-		 */
-		if (dd->ipath_flags & IPATH_INITTED) {
-			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-			ipath_setup_ht_setextled(dd,
-				INFINIPATH_IBCS_L_STATE_DOWN,
-				INFINIPATH_IBCS_LT_STATE_DISABLED);
-			ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-					  "mode), no longer usable, SN %.16s\n",
-					  dd->ipath_serial);
-			isfatal = 1;
-		}
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-		/* mark as having had error */
-		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-		/*
-		 * mark as not usable, at a minimum until driver
-		 * is reloaded, probably until reboot, since no
-		 * other reset is possible.
-		 */
-		dd->ipath_flags &= ~IPATH_INITTED;
-	} else {
-		*msg = 0; /* recovered from all of them */
-	}
-	if (*msg)
-		ipath_dev_err(dd, "%s hardware error\n", msg);
-	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
-		/*
-		 * for status file; if no trailing brace is copied,
-		 * we'll know it was truncated.
-		 */
-		snprintf(dd->ipath_freezemsg,
-			 dd->ipath_freezelen, "{%s}", msg);
-
-bail:;
-}
-
-/**
- * ipath_ht_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * fill in the board name, based on the board revision register
- */
-static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen)
-{
-	char *n = NULL;
-	u8 boardrev = dd->ipath_boardrev;
-	int ret = 0;
-
-	switch (boardrev) {
-	case 5:
-		/*
-		 * original production board; two production levels, with
-		 * different serial number ranges.   See ipath_ht_early_init() for
-		 * case where we enable IPATH_GPIO_INTR for later serial # range.
-		 * Original 112* serial number is no longer supported.
-		 */
-		n = "InfiniPath_QHT7040";
-		break;
-	case 7:
-		/* small form factor production board */
-		n = "InfiniPath_QHT7140";
-		break;
-	default:		/* don't know, just print the number */
-		ipath_dev_err(dd, "Don't yet know about board "
-			      "with ID %u\n", boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
-			 boardrev);
-		break;
-	}
-	if (n)
-		snprintf(name, namelen, "%s", n);
-
-	if (ret) {
-		ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
-		goto bail;
-	}
-	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-		dd->ipath_minrev > 4)) {
-		/*
-		 * This version of the driver only supports Rev 3.2 - 3.4
-		 */
-		ipath_dev_err(dd,
-			      "Unsupported InfiniPath hardware revision %u.%u!\n",
-			      dd->ipath_majrev, dd->ipath_minrev);
-		ret = 1;
-		goto bail;
-	}
-	/*
-	 * pkt/word counters are 32 bit, and therefore wrap fast enough
-	 * that we snapshot them from a timer, and maintain 64 bit shadow
-	 * copies
-	 */
-	dd->ipath_flags |= IPATH_32BITCOUNTERS;
-	dd->ipath_flags |= IPATH_GPIO_INTR;
-	if (dd->ipath_lbus_speed != 800)
-		ipath_dev_err(dd,
-			      "Incorrectly configured for HT @ %uMHz\n",
-			      dd->ipath_lbus_speed);
-
-	/*
-	 * set here, not in ipath_init_*_funcs because we have to do
-	 * it after we can read chip registers.
-	 */
-	dd->ipath_ureg_align =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-
-bail:
-	return ret;
-}
-
-static void ipath_check_htlink(struct ipath_devdata *dd)
-{
-	u8 linkerr, link_off, i;
-
-	for (i = 0; i < 2; i++) {
-		link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
-		if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
-			dev_info(&dd->pcidev->dev, "Couldn't read "
-				 "linkerror%d of HT slave/primary block\n",
-				 i);
-		else if (linkerr & 0xf0) {
-			ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-				   "clearing\n", linkerr >> 4, i);
-			/*
-			 * writing the linkerr bits that are set should
-			 * clear them
-			 */
-			if (pci_write_config_byte(dd->pcidev, link_off,
-						  linkerr))
-				ipath_dbg("Failed write to clear HT "
-					  "linkerror%d\n", i);
-			if (pci_read_config_byte(dd->pcidev, link_off,
-						 &linkerr))
-				dev_info(&dd->pcidev->dev,
-					 "Couldn't reread linkerror%d of "
-					 "HT slave/primary block\n", i);
-			else if (linkerr & 0xf0)
-				dev_info(&dd->pcidev->dev,
-					 "HT linkerror%d bits 0x%x "
-					 "couldn't be cleared\n",
-					 i, linkerr >> 4);
-		}
-	}
-}
-
-static int ipath_setup_ht_reset(struct ipath_devdata *dd)
-{
-	ipath_dbg("No reset possible for this InfiniPath hardware\n");
-	return 0;
-}
-
-#define HT_INTR_DISC_CONFIG  0x80	/* HT interrupt and discovery cap */
-#define HT_INTR_REG_INDEX    2	/* intconfig requires indirect accesses */
-
-/*
- * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
- * errors.  We only bother to do this at load time, because it's OK if
- * it happened before we were loaded (first time after boot/reset),
- * but any time after that, it's fatal anyway.  Also need to not check
- * for upper byte errors if we are in 8 bit mode, so figure out
- * our width.  For now, at least, also complain if it's 8 bit.
- */
-static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
-			     int pos, u8 cap_type)
-{
-	u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
-	u16 linkctrl = 0;
-	int i;
-
-	dd->ipath_ht_slave_off = pos;
-	/* command word, master_host bit */
-	/* master host || slave */
-	if ((cap_type >> 2) & 1)
-		link_a_b_off = 4;
-	else
-		link_a_b_off = 0;
-	ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
-		   link_a_b_off ? 1 : 0,
-		   link_a_b_off ? 'B' : 'A');
-
-	link_a_b_off += pos;
-
-	/*
-	 * check both link control registers; clear both HT CRC sets if
-	 * necessary.
-	 */
-	for (i = 0; i < 2; i++) {
-		link_off = pos + i * 4 + 0x4;
-		if (pci_read_config_word(pdev, link_off, &linkctrl))
-			ipath_dev_err(dd, "Couldn't read HT link control%d "
-				      "register\n", i);
-		else if (linkctrl & (0xf << 8)) {
-			ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
-				   "bits %x\n", i, linkctrl & (0xf << 8));
-			/*
-			 * now write them back to clear the error.
-			 */
-			pci_write_config_word(pdev, link_off,
-					      linkctrl & (0xf << 8));
-		}
-	}
-
-	/*
-	 * As with HT CRC bits, same for protocol errors that might occur
-	 * during boot.
-	 */
-	for (i = 0; i < 2; i++) {
-		link_off = pos + i * 4 + 0xd;
-		if (pci_read_config_byte(pdev, link_off, &linkerr))
-			dev_info(&pdev->dev, "Couldn't read linkerror%d "
-				 "of HT slave/primary block\n", i);
-		else if (linkerr & 0xf0) {
-			ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-				   "clearing\n", linkerr >> 4, i);
-			/*
-			 * writing the linkerr bits that are set will clear
-			 * them
-			 */
-			if (pci_write_config_byte
-			    (pdev, link_off, linkerr))
-				ipath_dbg("Failed write to clear HT "
-					  "linkerror%d\n", i);
-			if (pci_read_config_byte(pdev, link_off, &linkerr))
-				dev_info(&pdev->dev, "Couldn't reread "
-					 "linkerror%d of HT slave/primary "
-					 "block\n", i);
-			else if (linkerr & 0xf0)
-				dev_info(&pdev->dev, "HT linkerror%d bits "
-					 "0x%x couldn't be cleared\n",
-					 i, linkerr >> 4);
-		}
-	}
-
-	/*
-	 * this is just for our link to the host, not devices connected
-	 * through tunnel.
-	 */
-
-	if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
-		ipath_dev_err(dd, "Couldn't read HT link width "
-			      "config register\n");
-	else {
-		u32 width;
-		switch (linkwidth & 7) {
-		case 5:
-			width = 4;
-			break;
-		case 4:
-			width = 2;
-			break;
-		case 3:
-			width = 32;
-			break;
-		case 1:
-			width = 16;
-			break;
-		case 0:
-		default:	/* if wrong, assume 8 bit */
-			width = 8;
-			break;
-		}
-
-		dd->ipath_lbus_width = width;
-
-		if (linkwidth != 0x11) {
-			ipath_dev_err(dd, "Not configured for 16 bit HT "
-				      "(%x)\n", linkwidth);
-			if (!(linkwidth & 0xf)) {
-				ipath_dbg("Will ignore HT lane1 errors\n");
-				dd->ipath_flags |= IPATH_8BIT_IN_HT0;
-			}
-		}
-	}
-
-	/*
-	 * this is just for our link to the host, not devices connected
-	 * through tunnel.
-	 */
-	if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
-		ipath_dev_err(dd, "Couldn't read HT link frequency "
-			      "config register\n");
-	else {
-		u32 speed;
-		switch (linkwidth & 0xf) {
-		case 6:
-			speed = 1000;
-			break;
-		case 5:
-			speed = 800;
-			break;
-		case 4:
-			speed = 600;
-			break;
-		case 3:
-			speed = 500;
-			break;
-		case 2:
-			speed = 400;
-			break;
-		case 1:
-			speed = 300;
-			break;
-		default:
-			/*
-			 * assume reserved and vendor-specific are 200...
-			 */
-		case 0:
-			speed = 200;
-			break;
-		}
-		dd->ipath_lbus_speed = speed;
-	}
-
-	snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
-		"HyperTransport,%uMHz,x%u\n",
-		dd->ipath_lbus_speed,
-		dd->ipath_lbus_width);
-}
-
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
-	int ret;
-
-	if (dd->ipath_intconfig) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
-				 dd->ipath_intconfig);	/* interrupt address */
-		ret = 0;
-	} else {
-		ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
-			      "interrupt address\n");
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
-				struct ht_irq_msg *msg)
-{
-	struct ipath_devdata *dd = pci_get_drvdata(dev);
-	u64 prev_intconfig = dd->ipath_intconfig;
-
-	dd->ipath_intconfig = msg->address_lo;
-	dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
-
-	/*
-	 * If the previous value of dd->ipath_intconfig is zero, we're
-	 * getting configured for the first time, and must not program the
-	 * intconfig register here (it will be programmed later, when the
-	 * hardware is ready).  Otherwise, we should.
-	 */
-	if (prev_intconfig)
-		ipath_ht_intconfig(dd);
-}
-
-/**
- * ipath_setup_ht_config - setup the interruptconfig register
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * setup the interruptconfig register from the HT config info.
- * Also clear CRC errors in HT linkcontrol, if necessary.
- * This is done only for the real hardware.  It is done before
- * chip address space is initted, so can't touch infinipath registers
- */
-static int ipath_setup_ht_config(struct ipath_devdata *dd,
-				 struct pci_dev *pdev)
-{
-	int pos, ret;
-
-	ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
-	if (ret < 0) {
-		ipath_dev_err(dd, "Couldn't create interrupt handler: "
-			      "err %d\n", ret);
-		goto bail;
-	}
-	dd->ipath_irq = ret;
-	ret = 0;
-
-	/*
-	 * Handle clearing CRC errors in linkctrl register if necessary.  We
-	 * do this early, before we ever enable errors or hardware errors,
-	 * mostly to avoid causing the chip to enter freeze mode.
-	 */
-	pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
-	if (!pos) {
-		ipath_dev_err(dd, "Couldn't find HyperTransport "
-			      "capability; no interrupts\n");
-		ret = -ENODEV;
-		goto bail;
-	}
-	do {
-		u8 cap_type;
-
-		/*
-		 * The HT capability type byte is 3 bytes after the
-		 * capability byte.
-		 */
-		if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
-			dev_info(&pdev->dev, "Couldn't read config "
-				 "command @ %d\n", pos);
-			continue;
-		}
-		if (!(cap_type & 0xE0))
-			slave_or_pri_blk(dd, pdev, pos, cap_type);
-	} while ((pos = pci_find_next_capability(pdev, pos,
-						 PCI_CAP_ID_HT)));
-
-	dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Called during driver unload.
- * This is currently a nop for the HT chip, not for all chips
- */
-static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
-{
-}
-
-/**
- * ipath_setup_ht_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * Set the state of the two external LEDs, to indicate physical and
- * logical state of IB link.   For this chip (at least with recommended
- * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
- * (logical state)
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
-				     u64 lst, u64 ltst)
-{
-	u64 extctl;
-	unsigned long flags = 0;
-
-	/* the diags use the LED to indicate diag info, so we leave
-	 * the external LED alone when the diags are running */
-	if (ipath_diag_inuse)
-		return;
-
-	/* Allow override of LED display for, e.g. Locating system in rack */
-	if (dd->ipath_led_override) {
-		ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
-			? INFINIPATH_IBCS_LT_STATE_LINKUP
-			: INFINIPATH_IBCS_LT_STATE_DISABLED;
-		lst = (dd->ipath_led_override & IPATH_LED_LOG)
-			? INFINIPATH_IBCS_L_STATE_ACTIVE
-			: INFINIPATH_IBCS_L_STATE_DOWN;
-	}
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	/*
-	 * start by setting both LED control bits to off, then turn
-	 * on the appropriate bit(s).
-	 */
-	if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
-		/*
-		 * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
-		 * is inverted,  because it is normally used to indicate
-		 * a hardware fault at reset, if there were errors
-		 */
-		extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
-			| INFINIPATH_EXTC_LEDGBLERR_OFF;
-		if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-			extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
-		if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-			extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
-	} else {
-		extctl = dd->ipath_extctrl &
-			~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-			  INFINIPATH_EXTC_LED2PRIPORT_ON);
-		if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-			extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-		if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-			extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-	}
-	dd->ipath_extctrl = extctl;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-}
-
-static void ipath_init_ht_variables(struct ipath_devdata *dd)
-{
-	/*
-	 * setup the register offsets, since they are different for each
-	 * chip
-	 */
-	dd->ipath_kregs = &ipath_ht_kregs;
-	dd->ipath_cregs = &ipath_ht_cregs;
-
-	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
-	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
-	/*
-	 * Fill in data for field-values that change in newer chips.
-	 * We dynamically specify only the mask for LINKTRAININGSTATE
-	 * and only the shift for LINKSTATE, as they are the only ones
-	 * that change.  Also precalculate the 3 link states of interest
-	 * and the combined mask.
-	 */
-	dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
-	dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
-	dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
-		dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
-	dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-		(INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
-	dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-		(INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
-	dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-		(INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
-	/*
-	 * Fill in data for ibcc field-values that change in newer chips.
-	 * We dynamically specify only the mask for LINKINITCMD
-	 * and only the shift for LINKCMD and MAXPKTLEN, as they are
-	 * the only ones that change.
-	 */
-	dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
-	dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
-	dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-
-	/* Fill in shifts for RcvCtrl. */
-	dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
-	dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
-	dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
-	dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
-
-	dd->ipath_i_bitsextant =
-		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-		(INFINIPATH_I_RCVAVAIL_MASK <<
-		 INFINIPATH_I_RCVAVAIL_SHIFT) |
-		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-
-	dd->ipath_e_bitsextant =
-		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-		INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-		INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-		INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-		INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-		INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-		INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-		INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-		INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-		INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-		INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-		INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-		INFINIPATH_E_HARDWARE;
-
-	dd->ipath_hwe_bitsextant =
-		(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
-		 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
-		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-		 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
-		(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-		 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-		INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
-		INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
-		INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
-		INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
-		INFINIPATH_HWE_HTCMISCERR4 |
-		INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
-		INFINIPATH_HWE_HTCMISCERR7 |
-		INFINIPATH_HWE_HTCBUSTREQPARITYERR |
-		INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
-		INFINIPATH_HWE_HTCBUSIREQPARITYERR |
-		INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
-		INFINIPATH_HWE_MEMBISTFAILED |
-		INFINIPATH_HWE_COREPLL_FBSLIP |
-		INFINIPATH_HWE_COREPLL_RFSLIP |
-		INFINIPATH_HWE_HTBPLL_FBSLIP |
-		INFINIPATH_HWE_HTBPLL_RFSLIP |
-		INFINIPATH_HWE_HTAPLL_FBSLIP |
-		INFINIPATH_HWE_HTAPLL_RFSLIP |
-		INFINIPATH_HWE_SERDESPLLFAILED |
-		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-
-	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-	dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
-	dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
-
-	/*
-	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
-	 * 2 is Some Misc, 3 is reserved for future.
-	 */
-	dd->ipath_eep_st_masks[0].hwerrs_to_log =
-		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
-	dd->ipath_eep_st_masks[1].hwerrs_to_log =
-		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
-	dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
-
-	dd->delay_mult = 2; /* SDR, 4X, can't change */
-
-	dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
-	dd->ipath_link_speed_supported = IPATH_IB_SDR;
-	dd->ipath_link_width_enabled = IB_WIDTH_4X;
-	dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
-	/* these can't change for this chip, so set once */
-	dd->ipath_link_width_active = dd->ipath_link_width_enabled;
-	dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
-}
-
-/**
- * ipath_ht_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
-{
-	ipath_err_t val;
-	u64 extsval;
-
-	extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-	if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-		ipath_dev_err(dd, "MemBIST did not complete!\n");
-	if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
-		ipath_dbg("MemBIST corrected\n");
-
-	ipath_check_htlink(dd);
-
-	/* barring bugs, all hwerrors become interrupts, which can */
-	val = -1LL;
-	/* don't look at crc lane1 if 8 bit */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-		val &= ~infinipath_hwe_htclnkabyte1crcerr;
-	/* don't look at crc lane1 if 8 bit */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-		val &= ~infinipath_hwe_htclnkbbyte1crcerr;
-
-	/*
-	 * disable RXDSYNCMEMPARITY because external serdes is unused,
-	 * and therefore the logic will never be used or initialized,
-	 * and uninitialized state will normally result in this error
-	 * being asserted.  Similarly for the external serdess pll
-	 * lock signal.
-	 */
-	val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
-		 INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
-
-	/*
-	 * Disable MISCERR4 because of an inversion in the HT core
-	 * logic checking for errors that cause this bit to be set.
-	 * The errata can also cause the protocol error bit to be set
-	 * in the HT config space linkerror register(s).
-	 */
-	val &= ~INFINIPATH_HWE_HTCMISCERR4;
-
-	/*
-	 * PLL ignored because unused MDIO interface has a logic problem
-	 */
-	if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
-		val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-	dd->ipath_hwerrmask = val;
-}
-
-
-
-
-/**
- * ipath_ht_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
-{
-	u64 val, config1;
-	int ret = 0, change = 0;
-
-	ipath_dbg("Trying to bringup serdes\n");
-
-	if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-	    INFINIPATH_HWE_SERDESPLLFAILED)
-	{
-		ipath_dbg("At start, serdes PLL failed bit set in "
-			  "hwerrstatus, clearing and continuing\n");
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-				 INFINIPATH_HWE_SERDESPLLFAILED);
-	}
-
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-	config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-	ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
-		   "config1=%llx, sstatus=%llx xgxs %llx\n",
-		   (unsigned long long) val, (unsigned long long) config1,
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-	/* force reset on */
-	val |= INFINIPATH_SERDC0_RESET_PLL
-		/* | INFINIPATH_SERDC0_RESET_MASK */
-		;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-	udelay(15);		/* need pll reset set at least for a bit */
-
-	if (val & INFINIPATH_SERDC0_RESET_PLL) {
-		u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
-		/* set lane resets, and tx idle, during pll reset */
-		val2 |= INFINIPATH_SERDC0_RESET_MASK |
-			INFINIPATH_SERDC0_TXIDLE;
-		ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
-			   "%llx)\n", (unsigned long long) val2);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-				 val2);
-		/*
-		 * be sure chip saw it
-		 */
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		/*
-		 * need pll reset clear at least 11 usec before lane
-		 * resets cleared; give it a few more
-		 */
-		udelay(15);
-		val = val2;	/* for check below */
-	}
-
-	if (val & (INFINIPATH_SERDC0_RESET_PLL |
-		   INFINIPATH_SERDC0_RESET_MASK |
-		   INFINIPATH_SERDC0_TXIDLE)) {
-		val &= ~(INFINIPATH_SERDC0_RESET_PLL |
-			 INFINIPATH_SERDC0_RESET_MASK |
-			 INFINIPATH_SERDC0_TXIDLE);
-		/* clear them */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-				 val);
-	}
-
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-	if (val & INFINIPATH_XGXS_RESET) {
-		/* normally true after boot */
-		val &= ~INFINIPATH_XGXS_RESET;
-		change = 1;
-	}
-	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
-	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
-		/* need to compensate for Tx inversion in partner */
-		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-		         INFINIPATH_XGXS_RX_POL_SHIFT);
-		val |= dd->ipath_rx_pol_inv <<
-			INFINIPATH_XGXS_RX_POL_SHIFT;
-		change = 1;
-	}
-	if (change)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-	/* clear current and de-emphasis bits */
-	config1 &= ~0x0ffffffff00ULL;
-	/* set current to 20ma */
-	config1 |= 0x00000000000ULL;
-	/* set de-emphasis to -5.68dB */
-	config1 |= 0x0cccc000000ULL;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-	ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
-		   "config1=%llx, sstatus=%llx xgxs %llx\n",
-		   (unsigned long long) val, (unsigned long long) config1,
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-	return ret;		/* for now, say we always succeeded */
-}
-
-/**
- * ipath_ht_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * driver is being unloaded
- */
-static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
-{
-	u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-	val |= INFINIPATH_SERDC0_TXIDLE;
-	ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-		  (unsigned long long) val);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_ht_put_tid(struct ipath_devdata *dd,
-			     u64 __iomem *tidptr, u32 type,
-			     unsigned long pa)
-{
-	if (!dd->ipath_kregbase)
-		return;
-
-	if (pa != dd->ipath_tidinvalid) {
-		if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
-			dev_info(&dd->pcidev->dev,
-				 "physaddr %lx has more than "
-				 "40 bits, using only 40!!!\n", pa);
-			pa &= INFINIPATH_RT_ADDR_MASK;
-		}
-		if (type == RCVHQ_RCV_TYPE_EAGER)
-			pa |= dd->ipath_tidtemplate;
-		else {
-			/* in words (fixed, full page).  */
-			u64 lenvalid = PAGE_SIZE >> 2;
-			lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-			pa |= lenvalid | INFINIPATH_RT_VALID;
-		}
-	}
-
-	writeq(pa, tidptr);
-}
-
-
-/**
- * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * Used from ipath_close(), and at chip initialization.
- */
-static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-	u64 __iomem *tidbase;
-	int i;
-
-	if (!dd->ipath_kregbase)
-		return;
-
-	ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-	/*
-	 * need to invalidate all of the expected TID entries for this
-	 * port, so we don't have valid entries that might somehow get
-	 * used (early in next use of this port, or through some bug)
-	 */
-	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-				   dd->ipath_rcvtidbase +
-				   port * dd->ipath_rcvtidcnt *
-				   sizeof(*tidbase));
-	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-		ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
-				 dd->ipath_tidinvalid);
-
-	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-				   dd->ipath_rcvegrbase +
-				   port * dd->ipath_rcvegrcnt *
-				   sizeof(*tidbase));
-
-	for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-		ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
-				 dd->ipath_tidinvalid);
-}
-
-/**
- * ipath_ht_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
-{
-	dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
-	dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-	dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
-
-	/*
-	 * work around chip errata bug 7358, by marking invalid tids
-	 * as having max length
-	 */
-	dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
-		INFINIPATH_RT_BUFSIZE_SHIFT;
-}
-
-static int ipath_ht_early_init(struct ipath_devdata *dd)
-{
-	u32 __iomem *piobuf;
-	u32 pioincr, val32;
-	int i;
-
-	/*
-	 * one cache line; long IB headers will spill over into received
-	 * buffer
-	 */
-	dd->ipath_rcvhdrentsize = 16;
-	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-	/*
-	 * For HT, we allocate a somewhat overly large eager buffer,
-	 * such that we can guarantee that we can receive the largest
-	 * packet that we can send out.  To truly support a 4KB MTU,
-	 * we need to bump this to a large value.  To date, other than
-	 * testing, we have never encountered an HCA that can really
-	 * send 4KB MTU packets, so we do not handle that (we'll get
-	 * errors interrupts if we ever see one).
-	 */
-	dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-
-	/*
-	 * the min() check here is currently a nop, but it may not
-	 * always be, depending on just how we do ipath_rcvegrbufsize
-	 */
-	dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-				 dd->ipath_rcvegrbufsize);
-	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-	ipath_ht_tidtemplate(dd);
-
-	/*
-	 * zero all the TID entries at startup.  We do this for sanity,
-	 * in case of a previous driver crash of some kind, and also
-	 * because the chip powers up with these memories in an unknown
-	 * state.  Use portcnt, not cfgports, since this is for the
-	 * full chip, not for current (possibly different) configuration
-	 * value.
-	 * Chip Errata bug 6447
-	 */
-	for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
-		ipath_ht_clear_tids(dd, val32);
-
-	/*
-	 * write the pbc of each buffer, to be sure it's initialized, then
-	 * cancel all the buffers, and also abort any packets that might
-	 * have been in flight for some reason (the latter is for driver
-	 * unload/reload, but isn't a bad idea at first init).	PIO send
-	 * isn't enabled at this point, so there is no danger of sending
-	 * these out on the wire.
-	 * Chip Errata bug 6610
-	 */
-	piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
-				  dd->ipath_piobufbase);
-	pioincr = dd->ipath_palign / sizeof(*piobuf);
-	for (i = 0; i < dd->ipath_piobcnt2k; i++) {
-		/*
-		 * reasonable word count, just to init pbc
-		 */
-		writel(16, piobuf);
-		piobuf += pioincr;
-	}
-
-	ipath_get_eeprom_info(dd);
-	if (dd->ipath_boardrev == 5) {
-		/*
-		 * Later production QHT7040 has same changes as QHT7140, so
-		 * can use GPIO interrupts.  They have serial #'s starting
-		 * with 128, rather than 112.
-		 */
-		if (dd->ipath_serial[0] == '1' &&
-		    dd->ipath_serial[1] == '2' &&
-		    dd->ipath_serial[2] == '8')
-			dd->ipath_flags |= IPATH_GPIO_INTR;
-		else {
-			ipath_dev_err(dd, "Unsupported InfiniPath board "
-				"(serial number %.16s)!\n",
-				dd->ipath_serial);
-			return 1;
-		}
-	}
-
-	if (dd->ipath_minrev >= 4) {
-		/* Rev4+ reports extra errors via internal GPIO pins */
-		dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
-		dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 dd->ipath_gpio_mask);
-	}
-
-	return 0;
-}
-
-
-/**
- * ipath_init_ht_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithms.
- */
-static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-	struct ipath_base_info *kinfo = kbase;
-
-	kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-		IPATH_RUNTIME_PIO_REGSWAPPED;
-
-	if (pd->port_dd->ipath_minrev < 4)
-		kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
-
-	return 0;
-}
-
-static void ipath_ht_free_irq(struct ipath_devdata *dd)
-{
-	free_irq(dd->ipath_irq, dd);
-	ht_destroy_irq(dd->ipath_irq);
-	dd->ipath_irq = 0;
-	dd->ipath_intconfig = 0;
-}
-
-static struct ipath_message_header *
-ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
-	return (struct ipath_message_header *)
-		&rhf_addr[sizeof(u64) / sizeof(u32)];
-}
-
-static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
-	dd->ipath_portcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
-	dd->ipath_p0_rcvegrcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-}
-
-static void ipath_ht_read_counters(struct ipath_devdata *dd,
-				   struct infinipath_counters *cntrs)
-{
-	cntrs->LBIntCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
-	cntrs->LBFlowStallCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
-	cntrs->TxSDmaDescCnt = 0;
-	cntrs->TxUnsupVLErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
-	cntrs->TxDataPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
-	cntrs->TxFlowPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
-	cntrs->TxDwordCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
-	cntrs->TxLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
-	cntrs->TxMaxMinLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
-	cntrs->TxUnderrunCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
-	cntrs->TxFlowStallCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
-	cntrs->TxDroppedPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
-	cntrs->RxDroppedPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
-	cntrs->RxDataPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
-	cntrs->RxFlowPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
-	cntrs->RxDwordCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
-	cntrs->RxLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
-	cntrs->RxMaxMinLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
-	cntrs->RxICRCErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
-	cntrs->RxVCRCErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
-	cntrs->RxFlowCtrlErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
-	cntrs->RxBadFormatCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
-	cntrs->RxLinkProblemCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
-	cntrs->RxEBPCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
-	cntrs->RxLPCRCErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
-	cntrs->RxBufOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
-	cntrs->RxTIDFullErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
-	cntrs->RxTIDValidErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
-	cntrs->RxPKeyMismatchCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
-	cntrs->RxP0HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
-	cntrs->RxP1HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
-	cntrs->RxP2HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
-	cntrs->RxP3HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
-	cntrs->RxP4HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
-	cntrs->RxP5HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
-	cntrs->RxP6HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
-	cntrs->RxP7HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
-	cntrs->RxP8HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
-	cntrs->RxP9HdrEgrOvflCnt = 0;
-	cntrs->RxP10HdrEgrOvflCnt = 0;
-	cntrs->RxP11HdrEgrOvflCnt = 0;
-	cntrs->RxP12HdrEgrOvflCnt = 0;
-	cntrs->RxP13HdrEgrOvflCnt = 0;
-	cntrs->RxP14HdrEgrOvflCnt = 0;
-	cntrs->RxP15HdrEgrOvflCnt = 0;
-	cntrs->RxP16HdrEgrOvflCnt = 0;
-	cntrs->IBStatusChangeCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
-	cntrs->IBLinkErrRecoveryCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
-	cntrs->IBLinkDownedCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
-	cntrs->IBSymbolErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
-	cntrs->RxVL15DroppedPktCnt = 0;
-	cntrs->RxOtherLocalPhyErrCnt = 0;
-	cntrs->PcieRetryBufDiagQwordCnt = 0;
-	cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
-	cntrs->LocalLinkIntegrityErrCnt =
-		(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-		dd->ipath_lli_errs : dd->ipath_lli_errors;
-	cntrs->RxVlErrCnt = 0;
-	cntrs->RxDlidFltrCnt = 0;
-}
-
-
-/* no interrupt fallback for these chips */
-static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
-{
-	return 0;
-}
-
-
-/*
- * reset the XGXS (between serdes and IBC).  Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining.  To do this right, we reset IBC
- * as well.
- */
-static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
-{
-	u64 val, prev_val;
-
-	prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-	val = prev_val | INFINIPATH_XGXS_RESET;
-	prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control);
-}
-
-
-static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
-	int ret;
-
-	switch (which) {
-	case IPATH_IB_CFG_LWID:
-		ret = dd->ipath_link_width_active;
-		break;
-	case IPATH_IB_CFG_SPD:
-		ret = dd->ipath_link_speed_active;
-		break;
-	case IPATH_IB_CFG_LWID_ENB:
-		ret = dd->ipath_link_width_enabled;
-		break;
-	case IPATH_IB_CFG_SPD_ENB:
-		ret = dd->ipath_link_speed_enabled;
-		break;
-	default:
-		ret =  -ENOTSUPP;
-		break;
-	}
-	return ret;
-}
-
-
-/* we assume range checking is already done, if needed */
-static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
-	int ret = 0;
-
-	if (which == IPATH_IB_CFG_LWID_ENB)
-		dd->ipath_link_width_enabled = val;
-	else if (which == IPATH_IB_CFG_SPD_ENB)
-		dd->ipath_link_speed_enabled = val;
-	else
-		ret = -ENOTSUPP;
-	return ret;
-}
-
-
-static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
-{
-}
-
-
-static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
-	ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
-		ipath_ib_linktrstate(dd, ibcs));
-	return 0;
-}
-
-
-/**
- * ipath_init_iba6110_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
-{
-	dd->ipath_f_intrsetup = ipath_ht_intconfig;
-	dd->ipath_f_bus = ipath_setup_ht_config;
-	dd->ipath_f_reset = ipath_setup_ht_reset;
-	dd->ipath_f_get_boardname = ipath_ht_boardname;
-	dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
-	dd->ipath_f_early_init = ipath_ht_early_init;
-	dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
-	dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
-	dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
-	dd->ipath_f_clear_tids = ipath_ht_clear_tids;
-	dd->ipath_f_put_tid = ipath_ht_put_tid;
-	dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
-	dd->ipath_f_setextled = ipath_setup_ht_setextled;
-	dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-	dd->ipath_f_free_irq = ipath_ht_free_irq;
-	dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
-	dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
-	dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
-	dd->ipath_f_config_ports = ipath_ht_config_ports;
-	dd->ipath_f_read_counters = ipath_ht_read_counters;
-	dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
-	dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
-	dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
-	dd->ipath_f_config_jint = ipath_ht_config_jint;
-	dd->ipath_f_ib_updown = ipath_ht_ib_updown;
-
-	/*
-	 * initialize chip-specific variables
-	 */
-	ipath_init_ht_variables(dd);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
deleted file mode 100644
index a5eea19..0000000
--- a/drivers/staging/rdma/ipath/ipath_init_chip.c
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-/*
- * min buffers we want to have per port, after driver
- */
-#define IPATH_MIN_USER_PORT_BUFCNT 7
-
-/*
- * Number of ports we are configured to use (to allow for more pio
- * buffers per port, etc.)  Zero means use chip value.
- */
-static ushort ipath_cfgports;
-
-module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
-MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
-
-/*
- * Number of buffers reserved for driver (verbs and layered drivers.)
- * Initialized based on number of PIO buffers if not set via module interface.
- * The problem with this is that it's global, but we'll use different
- * numbers for different chip types.
- */
-static ushort ipath_kpiobufs;
-
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
-
-module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
-		  &ipath_kpiobufs, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
-
-/**
- * create_port0_egr - allocate the eager TID buffers
- * @dd: the infinipath device
- *
- * This code is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance.
- * This is the kernel (port0) version.
- *
- * Allocate the eager TID buffers and program them into infinipath.
- * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like verbs
- * packets, or pass the buffers up to the ipath layered driver and
- * thence the network layer, replacing them as we do so (see
- * ipath_rcv_layer()).
- */
-static int create_port0_egr(struct ipath_devdata *dd)
-{
-	unsigned e, egrcnt;
-	struct ipath_skbinfo *skbinfo;
-	int ret;
-
-	egrcnt = dd->ipath_p0_rcvegrcnt;
-
-	skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
-	if (skbinfo == NULL) {
-		ipath_dev_err(dd, "allocation error for eager TID "
-			      "skb array\n");
-		ret = -ENOMEM;
-		goto bail;
-	}
-	for (e = 0; e < egrcnt; e++) {
-		/*
-		 * This is a bit tricky in that we allocate extra
-		 * space for 2 bytes of the 14 byte ethernet header.
-		 * These two bytes are passed in the ipath header so
-		 * the rest of the data is word aligned.  We allocate
-		 * 4 bytes so that the data buffer stays word aligned.
-		 * See ipath_kreceive() for more details.
-		 */
-		skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
-		if (!skbinfo[e].skb) {
-			ipath_dev_err(dd, "SKB allocation error for "
-				      "eager TID %u\n", e);
-			while (e != 0)
-				dev_kfree_skb(skbinfo[--e].skb);
-			vfree(skbinfo);
-			ret = -ENOMEM;
-			goto bail;
-		}
-	}
-	/*
-	 * After loop above, so we can test non-NULL to see if ready
-	 * to use at receive, etc.
-	 */
-	dd->ipath_port0_skbinfo = skbinfo;
-
-	for (e = 0; e < egrcnt; e++) {
-		dd->ipath_port0_skbinfo[e].phys =
-		  ipath_map_single(dd->pcidev,
-				   dd->ipath_port0_skbinfo[e].skb->data,
-				   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
-		dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
-				    ((char __iomem *) dd->ipath_kregbase +
-				     dd->ipath_rcvegrbase),
-				    RCVHQ_RCV_TYPE_EAGER,
-				    dd->ipath_port0_skbinfo[e].phys);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int bringup_link(struct ipath_devdata *dd)
-{
-	u64 val, ibc;
-	int ret = 0;
-
-	/* hold IBC in reset */
-	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control);
-
-	/*
-	 * set initial max size pkt IBC will send, including ICRC; it's the
-	 * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
-	 */
-	val = (dd->ipath_ibmaxlen >> 2) + 1;
-	ibc = val << dd->ibcc_mpl_shift;
-
-	/* flowcontrolwatermark is in units of KBytes */
-	ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
-	/*
-	 * How often flowctrl sent.  More or less in usecs; balance against
-	 * watermark value, so that in theory senders always get a flow
-	 * control update in time to not let the IB link go idle.
-	 */
-	ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
-	/* max error tolerance */
-	ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-	/* use "real" buffer space for */
-	ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
-	/* IB credit flow control. */
-	ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-	/* initially come up waiting for TS1, without sending anything. */
-	dd->ipath_ibcctrl = ibc;
-	/*
-	 * Want to start out with both LINKCMD and LINKINITCMD in NOP
-	 * (0 and 0).  Don't put linkinitcmd in ipath_ibcctrl, want that
-	 * to stay a NOP. Flag that we are disabled, for the (unlikely)
-	 * case that some recovery path is trying to bring the link up
-	 * before we are ready.
-	 */
-	ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-		INFINIPATH_IBCC_LINKINITCMD_SHIFT;
-	dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-	ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
-		   (unsigned long long) ibc);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
-
-	// be sure chip saw it
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-	ret = dd->ipath_f_bringup_serdes(dd);
-
-	if (ret)
-		dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
-			 "not usable\n");
-	else {
-		/* enable IBC */
-		dd->ipath_control |= INFINIPATH_C_LINKENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-				 dd->ipath_control);
-	}
-
-	return ret;
-}
-
-static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
-{
-	struct ipath_portdata *pd;
-
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (pd) {
-		pd->port_dd = dd;
-		pd->port_cnt = 1;
-		/* The port 0 pkey table is used by the layer interface. */
-		pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
-		pd->port_seq_cnt = 1;
-	}
-	return pd;
-}
-
-static int init_chip_first(struct ipath_devdata *dd)
-{
-	struct ipath_portdata *pd;
-	int ret = 0;
-	u64 val;
-
-	spin_lock_init(&dd->ipath_kernel_tid_lock);
-	spin_lock_init(&dd->ipath_user_tid_lock);
-	spin_lock_init(&dd->ipath_sendctrl_lock);
-	spin_lock_init(&dd->ipath_uctxt_lock);
-	spin_lock_init(&dd->ipath_sdma_lock);
-	spin_lock_init(&dd->ipath_gpio_lock);
-	spin_lock_init(&dd->ipath_eep_st_lock);
-	spin_lock_init(&dd->ipath_sdepb_lock);
-	mutex_init(&dd->ipath_eep_lock);
-
-	/*
-	 * skip cfgports stuff because we are not allocating memory,
-	 * and we don't want problems if the portcnt changed due to
-	 * cfgports.  We do still check and report a difference, if
-	 * not same (should be impossible).
-	 */
-	dd->ipath_f_config_ports(dd, ipath_cfgports);
-	if (!ipath_cfgports)
-		dd->ipath_cfgports = dd->ipath_portcnt;
-	else if (ipath_cfgports <= dd->ipath_portcnt) {
-		dd->ipath_cfgports = ipath_cfgports;
-		ipath_dbg("Configured to use %u ports out of %u in chip\n",
-			  dd->ipath_cfgports, ipath_read_kreg32(dd,
-			  dd->ipath_kregs->kr_portcnt));
-	} else {
-		dd->ipath_cfgports = dd->ipath_portcnt;
-		ipath_dbg("Tried to configured to use %u ports; chip "
-			  "only supports %u\n", ipath_cfgports,
-			  ipath_read_kreg32(dd,
-				  dd->ipath_kregs->kr_portcnt));
-	}
-	/*
-	 * Allocate full portcnt array, rather than just cfgports, because
-	 * cleanup iterates across all possible ports.
-	 */
-	dd->ipath_pd = kcalloc(dd->ipath_portcnt, sizeof(*dd->ipath_pd),
-			       GFP_KERNEL);
-
-	if (!dd->ipath_pd) {
-		ipath_dev_err(dd, "Unable to allocate portdata array, "
-			      "failing\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	pd = create_portdata0(dd);
-	if (!pd) {
-		ipath_dev_err(dd, "Unable to allocate portdata for port "
-			      "0, failing\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-	dd->ipath_pd[0] = pd;
-
-	dd->ipath_rcvtidcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-	dd->ipath_rcvtidbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-	dd->ipath_rcvegrcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-	dd->ipath_rcvegrbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-	dd->ipath_palign =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-	dd->ipath_piobufbase =
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
-	dd->ipath_piosize2k = val & ~0U;
-	dd->ipath_piosize4k = val >> 32;
-	if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
-		ipath_mtu4096 = 0; /* 4KB not supported by this chip */
-	dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
-	dd->ipath_piobcnt2k = val & ~0U;
-	dd->ipath_piobcnt4k = val >> 32;
-	dd->ipath_pio2kbase =
-		(u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-				 (dd->ipath_piobufbase & 0xffffffff));
-	if (dd->ipath_piobcnt4k) {
-		dd->ipath_pio4kbase = (u32 __iomem *)
-			(((char __iomem *) dd->ipath_kregbase) +
-			 (dd->ipath_piobufbase >> 32));
-		/*
-		 * 4K buffers take 2 pages; we use roundup just to be
-		 * paranoid; we calculate it once here, rather than on
-		 * ever buf allocate
-		 */
-		dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
-					  dd->ipath_palign);
-		ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
-			  "(%x aligned)\n",
-			  dd->ipath_piobcnt2k, dd->ipath_piosize2k,
-			  dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
-			  dd->ipath_piosize4k, dd->ipath_pio4kbase,
-			  dd->ipath_4kalign);
-	} else {
-		ipath_dbg("%u 2k piobufs @ %p\n",
-			  dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
-	}
-done:
-	return ret;
-}
-
-/**
- * init_chip_reset - re-initialize after a reset, or enable
- * @dd: the infinipath device
- *
- * sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explicitly, in case reset
- * failed
- */
-static int init_chip_reset(struct ipath_devdata *dd)
-{
-	u32 rtmp;
-	int i;
-	unsigned long flags;
-
-	/*
-	 * ensure chip does no sends or receives, tail updates, or
-	 * pioavail updates while we re-initialize
-	 */
-	dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
-	for (i = 0; i < dd->ipath_portcnt; i++) {
-		clear_bit(dd->ipath_r_portenable_shift + i,
-			  &dd->ipath_rcvctrl);
-		clear_bit(dd->ipath_r_intravail_shift + i,
-			  &dd->ipath_rcvctrl);
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-		dd->ipath_rcvctrl);
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl = 0U; /* no sdma, etc */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-	if (rtmp != dd->ipath_rcvtidcnt)
-		dev_info(&dd->pcidev->dev, "tidcnt was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvtidcnt, rtmp);
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-	if (rtmp != dd->ipath_rcvtidbase)
-		dev_info(&dd->pcidev->dev, "tidbase was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvtidbase, rtmp);
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-	if (rtmp != dd->ipath_rcvegrcnt)
-		dev_info(&dd->pcidev->dev, "egrcnt was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvegrcnt, rtmp);
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-	if (rtmp != dd->ipath_rcvegrbase)
-		dev_info(&dd->pcidev->dev, "egrbase was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvegrbase, rtmp);
-
-	return 0;
-}
-
-static int init_pioavailregs(struct ipath_devdata *dd)
-{
-	int ret;
-
-	dd->ipath_pioavailregs_dma = dma_alloc_coherent(
-		&dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
-		GFP_KERNEL);
-	if (!dd->ipath_pioavailregs_dma) {
-		ipath_dev_err(dd, "failed to allocate PIOavail reg area "
-			      "in memory\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	/*
-	 * we really want L2 cache aligned, but for current CPUs of
-	 * interest, they are the same.
-	 */
-	dd->ipath_statusp = (u64 *)
-		((char *)dd->ipath_pioavailregs_dma +
-		 ((2 * L1_CACHE_BYTES +
-		   dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
-	/* copy the current value now that it's really allocated */
-	*dd->ipath_statusp = dd->_ipath_status;
-	/*
-	 * setup buffer to hold freeze msg, accessible to apps,
-	 * following statusp
-	 */
-	dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
-	/* and its length */
-	dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
-
-	ret = 0;
-
-done:
-	return ret;
-}
-
-/**
- * init_shadow_tids - allocate the shadow TID array
- * @dd: the infinipath device
- *
- * allocate the shadow TID array, so we can ipath_munlock previous
- * entries.  It may make more sense to move the pageshadow to the
- * port data structure, so we only allocate memory for ports actually
- * in use, since we at 8k per port, now.
- */
-static void init_shadow_tids(struct ipath_devdata *dd)
-{
-	struct page **pages;
-	dma_addr_t *addrs;
-
-	pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-			sizeof(struct page *));
-	if (!pages) {
-		ipath_dev_err(dd, "failed to allocate shadow page * "
-			      "array, no expected sends!\n");
-		dd->ipath_pageshadow = NULL;
-		return;
-	}
-
-	addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-			sizeof(dma_addr_t));
-	if (!addrs) {
-		ipath_dev_err(dd, "failed to allocate shadow dma handle "
-			      "array, no expected sends!\n");
-		vfree(pages);
-		dd->ipath_pageshadow = NULL;
-		return;
-	}
-
-	dd->ipath_pageshadow = pages;
-	dd->ipath_physshadow = addrs;
-}
-
-static void enable_chip(struct ipath_devdata *dd, int reinit)
-{
-	u32 val;
-	u64 rcvmask;
-	unsigned long flags;
-	int i;
-
-	if (!reinit)
-		init_waitqueue_head(&ipath_state_wait);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	/* Enable PIO send, and update of PIOavail regs to memory. */
-	dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
-		INFINIPATH_S_PIOBUFAVAILUPD;
-
-	/*
-	 * Set the PIO avail update threshold to host memory
-	 * on chips that support it.
-	 */
-	if (dd->ipath_pioupd_thresh)
-		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-			<< INFINIPATH_S_UPDTHRESH_SHIFT;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/*
-	 * Enable kernel ports' receive and receive interrupt.
-	 * Other ports done as user opens and inits them.
-	 */
-	rcvmask = 1ULL;
-	dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
-		(rcvmask << dd->ipath_r_intravail_shift);
-	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
-		dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-
-	/*
-	 * now ready for use.  this should be cleared whenever we
-	 * detect a reset, or initiate one.
-	 */
-	dd->ipath_flags |= IPATH_INITTED;
-
-	/*
-	 * Init our shadow copies of head from tail values,
-	 * and write head values to match.
-	 */
-	val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
-	ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
-
-	/* Initialize so we interrupt on next packet received */
-	ipath_write_ureg(dd, ur_rcvhdrhead,
-			 dd->ipath_rhdrhead_intr_off |
-			 dd->ipath_pd[0]->port_head, 0);
-
-	/*
-	 * by now pioavail updates to memory should have occurred, so
-	 * copy them into our working/shadow registers; this is in
-	 * case something went wrong with abort, but mostly to get the
-	 * initial values of the generation bit correct.
-	 */
-	for (i = 0; i < dd->ipath_pioavregs; i++) {
-		__le64 pioavail;
-
-		/*
-		 * Chip Errata bug 6641; even and odd qwords>3 are swapped.
-		 */
-		if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-			pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
-		else
-			pioavail = dd->ipath_pioavailregs_dma[i];
-		/*
-		 * don't need to worry about ipath_pioavailkernel here
-		 * because we will call ipath_chg_pioavailkernel() later
-		 * in initialization, to busy out buffers as needed
-		 */
-		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
-	}
-	/* can get counters, stats, etc. */
-	dd->ipath_flags |= IPATH_PRESENT;
-}
-
-static int init_housekeeping(struct ipath_devdata *dd, int reinit)
-{
-	char boardn[40];
-	int ret = 0;
-
-	/*
-	 * have to clear shadow copies of registers at init that are
-	 * not otherwise set here, or all kinds of bizarre things
-	 * happen with driver on chip reset
-	 */
-	dd->ipath_rcvhdrsize = 0;
-
-	/*
-	 * Don't clear ipath_flags as 8bit mode was set before
-	 * entering this func. However, we do set the linkstate to
-	 * unknown, so we can watch for a transition.
-	 * PRESENT is set because we want register reads to work,
-	 * and the kernel infrastructure saw it in config space;
-	 * We clear it if we have failures.
-	 */
-	dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
-	dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
-			     IPATH_LINKDOWN | IPATH_LINKINIT);
-
-	ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
-	dd->ipath_revision =
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
-
-	/*
-	 * set up fundamental info we need to use the chip; we assume
-	 * if the revision reg and these regs are OK, we don't need to
-	 * special case the rest
-	 */
-	dd->ipath_sregbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
-	dd->ipath_cregbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
-	dd->ipath_uregbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
-	ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
-		   "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
-		   dd->ipath_uregbase, dd->ipath_cregbase);
-	if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
-	    || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
-	    || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
-	    || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
-		ipath_dev_err(dd, "Register read failures from chip, "
-			      "giving up initialization\n");
-		dd->ipath_flags &= ~IPATH_PRESENT;
-		ret = -ENODEV;
-		goto done;
-	}
-
-
-	/* clear diagctrl register, in case diags were running and crashed */
-	ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
-
-	/* clear the initial reset flag, in case first driver load */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-			 INFINIPATH_E_RESET);
-
-	ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
-		   (unsigned long long) dd->ipath_revision,
-		   dd->ipath_pcirev);
-
-	if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
-	     INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
-		ipath_dev_err(dd, "Driver only handles version %d, "
-			      "chip swversion is %d (%llx), failng\n",
-			      IPATH_CHIP_SWVERSION,
-			      (int)(dd->ipath_revision >>
-				    INFINIPATH_R_SOFTWARE_SHIFT) &
-			      INFINIPATH_R_SOFTWARE_MASK,
-			      (unsigned long long) dd->ipath_revision);
-		ret = -ENOSYS;
-		goto done;
-	}
-	dd->ipath_majrev = (u8) ((dd->ipath_revision >>
-				  INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
-				 INFINIPATH_R_CHIPREVMAJOR_MASK);
-	dd->ipath_minrev = (u8) ((dd->ipath_revision >>
-				  INFINIPATH_R_CHIPREVMINOR_SHIFT) &
-				 INFINIPATH_R_CHIPREVMINOR_MASK);
-	dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
-				    INFINIPATH_R_BOARDID_SHIFT) &
-				   INFINIPATH_R_BOARDID_MASK);
-
-	ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
-
-	snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-		 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
-		 "SW Compat %u\n",
-		 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
-		 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
-		 INFINIPATH_R_ARCH_MASK,
-		 dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
-		 (unsigned)(dd->ipath_revision >>
-			    INFINIPATH_R_SOFTWARE_SHIFT) &
-		 INFINIPATH_R_SOFTWARE_MASK);
-
-	ipath_dbg("%s", dd->ipath_boardversion);
-
-	if (ret)
-		goto done;
-
-	if (reinit)
-		ret = init_chip_reset(dd);
-	else
-		ret = init_chip_first(dd);
-
-done:
-	return ret;
-}
-
-static void verify_interrupt(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-
-	if (!dd)
-		return; /* being torn down */
-
-	/*
-	 * If we don't have any interrupts, let the user know and
-	 * don't bother checking again.
-	 */
-	if (dd->ipath_int_counter == 0) {
-		if (!dd->ipath_f_intr_fallback(dd))
-			dev_err(&dd->pcidev->dev, "No interrupts detected, "
-				"not usable.\n");
-		else /* re-arm the timer to see if fallback works */
-			mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
-	} else
-		ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
-			dd->ipath_int_counter);
-}
-
-/**
- * ipath_init_chip - do the actual initialization sequence on the chip
- * @dd: the infinipath device
- * @reinit: reinitializing, so don't allocate new memory
- *
- * Do the actual initialization sequence on the chip.  This is done
- * both from the init routine called from the PCI infrastructure, and
- * when we reset the chip, or detect that it was reset internally,
- * or it's administratively re-enabled.
- *
- * Memory allocation here and in called routines is only done in
- * the first case (reinit == 0).  We have to be careful, because even
- * without memory allocation, we need to re-write all the chip registers
- * TIDs, etc. after the reset or enable has completed.
- */
-int ipath_init_chip(struct ipath_devdata *dd, int reinit)
-{
-	int ret = 0;
-	u32 kpiobufs, defkbufs;
-	u32 piobufs, uports;
-	u64 val;
-	struct ipath_portdata *pd;
-	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-
-	ret = init_housekeeping(dd, reinit);
-	if (ret)
-		goto done;
-
-	/*
-	 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
-	 * but then it no longer nicely fits power of two, and since
-	 * we now use routines that backend onto __get_free_pages, the
-	 * rest would be wasted.
-	 */
-	dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
-			 dd->ipath_rcvhdrcnt);
-
-	/*
-	 * Set up the shadow copies of the piobufavail registers,
-	 * which we compare against the chip registers for now, and
-	 * the in memory DMA'ed copies of the registers.  This has to
-	 * be done early, before we calculate lastport, etc.
-	 */
-	piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	/*
-	 * calc number of pioavail registers, and save it; we have 2
-	 * bits per buffer.
-	 */
-	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
-		/ (sizeof(u64) * BITS_PER_BYTE / 2);
-	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
-	if (piobufs > 144)
-		defkbufs = 32 + dd->ipath_pioreserved;
-	else
-		defkbufs = 16 + dd->ipath_pioreserved;
-
-	if (ipath_kpiobufs && (ipath_kpiobufs +
-		(uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
-		int i = (int) piobufs -
-			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
-		if (i < 1)
-			i = 1;
-		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
-			 "%d for kernel leaves too few for %d user ports "
-			 "(%d each); using %u\n", ipath_kpiobufs,
-			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
-		/*
-		 * shouldn't change ipath_kpiobufs, because could be
-		 * different for different devices...
-		 */
-		kpiobufs = i;
-	} else if (ipath_kpiobufs)
-		kpiobufs = ipath_kpiobufs;
-	else
-		kpiobufs = defkbufs;
-	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
-	dd->ipath_pbufsport =
-		uports ? dd->ipath_lastport_piobuf / uports : 0;
-	/* if not an even divisor, some user ports get extra buffers */
-	dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
-		(dd->ipath_pbufsport * uports);
-	if (dd->ipath_ports_extrabuf)
-		ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
-			"ports <= %u\n", dd->ipath_pbufsport,
-			dd->ipath_ports_extrabuf);
-	dd->ipath_lastpioindex = 0;
-	dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
-	/* ipath_pioavailshadow initialized earlier */
-	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
-		   "each for %u user ports\n", kpiobufs,
-		   piobufs, dd->ipath_pbufsport, uports);
-	ret = dd->ipath_f_early_init(dd);
-	if (ret) {
-		ipath_dev_err(dd, "Early initialization failure\n");
-		goto done;
-	}
-
-	/*
-	 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
-	 * done after early_init.
-	 */
-	dd->ipath_hdrqlast =
-		dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
-			 dd->ipath_rcvhdrentsize);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-			 dd->ipath_rcvhdrsize);
-
-	if (!reinit) {
-		ret = init_pioavailregs(dd);
-		init_shadow_tids(dd);
-		if (ret)
-			goto done;
-	}
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
-			 dd->ipath_pioavailregs_phys);
-
-	/*
-	 * this is to detect s/w errors, which the h/w works around by
-	 * ignoring the low 6 bits of address, if it wasn't aligned.
-	 */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
-	if (val != dd->ipath_pioavailregs_phys) {
-		ipath_dev_err(dd, "Catastrophic software error, "
-			      "SendPIOAvailAddr written as %lx, "
-			      "read back as %llx\n",
-			      (unsigned long) dd->ipath_pioavailregs_phys,
-			      (unsigned long long) val);
-		ret = -EINVAL;
-		goto done;
-	}
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
-
-	/*
-	 * make sure we are not in freeze, and PIO send enabled, so
-	 * writes to pbc happen
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-	/*
-	 * before error clears, since we expect serdes pll errors during
-	 * this, the first time after reset
-	 */
-	if (bringup_link(dd)) {
-		dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
-		ret = -ENETDOWN;
-		goto done;
-	}
-
-	/*
-	 * clear any "expected" hwerrs from reset and/or initialization
-	 * clear any that aren't enabled (at least this once), and then
-	 * set the enable mask
-	 */
-	dd->ipath_f_init_hwerrors(dd);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-			 dd->ipath_hwerrmask);
-
-	/* clear all */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-	/* enable errors that are masked, at least this first time. */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-			 ~dd->ipath_maskederrs);
-	dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
-	dd->ipath_errormask =
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-	/* clear any interrupts up to this point (ints still not enabled) */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-	dd->ipath_f_tidtemplate(dd);
-
-	/*
-	 * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
-	 * re-init, the simplest way to handle this is to free
-	 * existing, and re-allocate.
-	 * Need to re-create rest of port 0 portdata as well.
-	 */
-	pd = dd->ipath_pd[0];
-	if (reinit) {
-		struct ipath_portdata *npd;
-
-		/*
-		 * Alloc and init new ipath_portdata for port0,
-		 * Then free old pd. Could lead to fragmentation, but also
-		 * makes later support for hot-swap easier.
-		 */
-		npd = create_portdata0(dd);
-		if (npd) {
-			ipath_free_pddata(dd, pd);
-			dd->ipath_pd[0] = npd;
-			pd = npd;
-		} else {
-			ipath_dev_err(dd, "Unable to allocate portdata"
-				      " for port 0, failing\n");
-			ret = -ENOMEM;
-			goto done;
-		}
-	}
-	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret)
-		ret = create_port0_egr(dd);
-	if (ret) {
-		ipath_dev_err(dd, "failed to allocate kernel port's "
-			      "rcvhdrq and/or egr bufs\n");
-		goto done;
-	} else {
-		enable_chip(dd, reinit);
-	}
-
-	/* after enable_chip, so pioavailshadow setup */
-	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
-
-	/*
-	 * Cancel any possible active sends from early driver load.
-	 * Follows early_init because some chips have to initialize
-	 * PIO buffers in early_init to avoid false parity errors.
-	 * After enable and ipath_chg_pioavailkernel so we can safely
-	 * enable pioavail updates and PIOENABLE; packets are now
-	 * ready to go out.
-	 */
-	ipath_cancel_sends(dd, 1);
-
-	if (!reinit) {
-		/*
-		 * Used when we close a port, for DMA already in flight
-		 * at close.
-		 */
-		dd->ipath_dummy_hdrq = dma_alloc_coherent(
-			&dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
-			&dd->ipath_dummy_hdrq_phys,
-			gfp_flags);
-		if (!dd->ipath_dummy_hdrq) {
-			dev_info(&dd->pcidev->dev,
-				"Couldn't allocate 0x%lx bytes for dummy hdrq\n",
-				dd->ipath_pd[0]->port_rcvhdrq_size);
-			/* fallback to just 0'ing */
-			dd->ipath_dummy_hdrq_phys = 0UL;
-		}
-	}
-
-	/*
-	 * cause retrigger of pending interrupts ignored during init,
-	 * even if we had errors
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-
-	if (!dd->ipath_stats_timer_active) {
-		/*
-		 * first init, or after an admin disable/enable
-		 * set up stats retrieval timer, even if we had errors
-		 * in last portion of setup
-		 */
-		setup_timer(&dd->ipath_stats_timer, ipath_get_faststats,
-				(unsigned long)dd);
-		/* every 5 seconds; */
-		dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
-		/* takes ~16 seconds to overflow at full IB 4x bandwdith */
-		add_timer(&dd->ipath_stats_timer);
-		dd->ipath_stats_timer_active = 1;
-	}
-
-	/* Set up SendDMA if chip supports it */
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		ret = setup_sdma(dd);
-
-	/* Set up HoL state */
-	setup_timer(&dd->ipath_hol_timer, ipath_hol_event, (unsigned long)dd);
-
-	dd->ipath_hol_state = IPATH_HOL_UP;
-
-done:
-	if (!ret) {
-		*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
-		if (!dd->ipath_f_intrsetup(dd)) {
-			/* now we can enable all interrupts from the chip */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-					 -1LL);
-			/* force re-interrupt of any pending interrupts. */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
-					 0ULL);
-			/* chip is usable; mark it as initialized */
-			*dd->ipath_statusp |= IPATH_STATUS_INITTED;
-
-			/*
-			 * setup to verify we get an interrupt, and fallback
-			 * to an alternate if necessary and possible
-			 */
-			if (!reinit) {
-				setup_timer(&dd->ipath_intrchk_timer,
-						verify_interrupt,
-						(unsigned long)dd);
-			}
-			dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
-			add_timer(&dd->ipath_intrchk_timer);
-		} else
-			ipath_dev_err(dd, "No interrupts enabled, couldn't "
-				      "setup interrupt address\n");
-
-		if (dd->ipath_cfgports > ipath_stats.sps_nports)
-			/*
-			 * sps_nports is a global, so, we set it to
-			 * the highest number of ports of any of the
-			 * chips we find; we never decrement it, at
-			 * least for now.  Since this might have changed
-			 * over disable/enable or prior to reset, always
-			 * do the check and potentially adjust.
-			 */
-			ipath_stats.sps_nports = dd->ipath_cfgports;
-	} else
-		ipath_dbg("Failed (%d) to initialize chip\n", ret);
-
-	/* if ret is non-zero, we probably should do some cleanup
-	   here... */
-	return ret;
-}
-
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
-{
-	struct ipath_devdata *dd;
-	unsigned long flags;
-	unsigned short val;
-	int ret;
-
-	ret = ipath_parse_ushort(str, &val);
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	if (ret < 0)
-		goto bail;
-
-	if (val == 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-		if (dd->ipath_kregbase)
-			continue;
-		if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-			   (dd->ipath_cfgports *
-			    IPATH_MIN_USER_PORT_BUFCNT)))
-		{
-			ipath_dev_err(
-				dd,
-				"Allocating %d PIO bufs for kernel leaves "
-				"too few for %d user ports (%d each)\n",
-				val, dd->ipath_cfgports - 1,
-				IPATH_MIN_USER_PORT_BUFCNT);
-			ret = -EINVAL;
-			goto bail;
-		}
-		dd->ipath_lastport_piobuf =
-			dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
-	}
-
-	ipath_kpiobufs = val;
-	ret = 0;
-bail:
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_intr.c b/drivers/staging/rdma/ipath/ipath_intr.c
deleted file mode 100644
index 0403fa2..0000000
--- a/drivers/staging/rdma/ipath/ipath_intr.c
+++ /dev/null
@@ -1,1271 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-
-/*
- * Called when we might have an error that is specific to a particular
- * PIO buffer, and may need to cancel that buffer, so it can be re-used.
- */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
-{
-	u32 piobcnt;
-	unsigned long sbuf[4];
-	/*
-	 * it's possible that sendbuffererror could have bits set; might
-	 * have already done this as a result of hardware error handling
-	 */
-	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	/* read these before writing errorclear */
-	sbuf[0] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror);
-	sbuf[1] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-	if (piobcnt > 128)
-		sbuf[2] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-	if (piobcnt > 192)
-		sbuf[3] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-	else
-		sbuf[3] = 0;
-
-	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-		int i;
-		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-			time_after(dd->ipath_lastcancel, jiffies)) {
-			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
-					  "SendbufErrs %lx %lx", sbuf[0],
-					  sbuf[1]);
-			if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-				printk(" %lx %lx ", sbuf[2], sbuf[3]);
-			printk("\n");
-		}
-
-		for (i = 0; i < piobcnt; i++)
-			if (test_bit(i, sbuf))
-				ipath_disarm_piobufs(dd, i, 1);
-		/* ignore armlaunch errs for a bit */
-		dd->ipath_lastcancel = jiffies+3;
-	}
-}
-
-
-/* These are all rcv-related errors which we want to count for stats */
-#define E_SUM_PKTERRS \
-	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
-	 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
-	 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
-	 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-	 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
-	 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
-
-/* These are all send-related errors which we want to count for stats */
-#define E_SUM_ERRS \
-	(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
-	 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
-	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-	 INFINIPATH_E_INVALIDADDR)
-
-/*
- * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
- * errors not related to freeze and cancelling buffers.  Can't ignore
- * armlaunch because could get more while still cleaning up, and need
- * to cancel those as they happen.
- */
-#define E_SPKT_ERRS_IGNORE \
-	 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
-	 INFINIPATH_E_SPKTLEN)
-
-/*
- * these are errors that can occur when the link changes state while
- * a packet is being sent or received.  This doesn't cover things
- * like EBP or VCRC that can be the result of a sending having the
- * link change state, so we receive a "known bad" packet.
- */
-#define E_SUM_LINK_PKTERRS \
-	(INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-	 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-	 INFINIPATH_E_RUNEXPCHAR)
-
-static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
-{
-	u64 ignore_this_time = 0;
-
-	ipath_disarm_senderrbufs(dd);
-	if ((errs & E_SUM_LINK_PKTERRS) &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		/*
-		 * This can happen when SMA is trying to bring the link
-		 * up, but the IB link changes state at the "wrong" time.
-		 * The IB logic then complains that the packet isn't
-		 * valid.  We don't want to confuse people, so we just
-		 * don't print them, except at debug
-		 */
-		ipath_dbg("Ignoring packet errors %llx, because link not "
-			  "ACTIVE\n", (unsigned long long) errs);
-		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-	}
-
-	return ignore_this_time;
-}
-
-/* generic hw error messages... */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
-	{ \
-		.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
-			  INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
-		.msg = "TXE " #a " Memory Parity"	     \
-	}
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
-	{ \
-		.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
-			  INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
-		.msg = "RXE " #a " Memory Parity"	     \
-	}
-
-static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
-	INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
-	INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
-
-	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
-	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
-	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
-
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
-};
-
-/**
- * ipath_format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
-	strlcat(msg, "[", msgl);
-	strlcat(msg, hwmsg, msgl);
-	strlcat(msg, "]", msgl);
-}
-
-/**
- * ipath_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
- */
-void ipath_format_hwerrors(u64 hwerrs,
-			   const struct ipath_hwerror_msgs *hwerrmsgs,
-			   size_t nhwerrmsgs,
-			   char *msg, size_t msgl)
-{
-	int i;
-	const int glen =
-	    ARRAY_SIZE(ipath_generic_hwerror_msgs);
-
-	for (i=0; i<glen; i++) {
-		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
-			ipath_format_hwmsg(msg, msgl,
-					   ipath_generic_hwerror_msgs[i].msg);
-		}
-	}
-
-	for (i=0; i<nhwerrmsgs; i++) {
-		if (hwerrs & hwerrmsgs[i].mask) {
-			ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-		}
-	}
-}
-
-/* return the strings for the most common link states */
-static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-	char *ret;
-	u32 state;
-
-	state = ipath_ib_state(dd, ibcs);
-	if (state == dd->ib_init)
-		ret = "Init";
-	else if (state == dd->ib_arm)
-		ret = "Arm";
-	else if (state == dd->ib_active)
-		ret = "Active";
-	else
-		ret = "Down";
-	return ret;
-}
-
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
-{
-	struct ib_event event;
-
-	event.device = &dd->verbs_dev->ibdev;
-	event.element.port_num = 1;
-	event.event = ev;
-	ib_dispatch_event(&event);
-}
-
-static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-				     ipath_err_t errs)
-{
-	u32 ltstate, lstate, ibstate, lastlstate;
-	u32 init = dd->ib_init;
-	u32 arm = dd->ib_arm;
-	u32 active = dd->ib_active;
-	const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-
-	lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
-	ibstate = ipath_ib_state(dd, ibcs);
-	/* linkstate at last interrupt */
-	lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-	ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
-
-	/*
-	 * Since going into a recovery state causes the link state to go
-	 * down and since recovery is transitory, it is better if we "miss"
-	 * ever seeing the link training state go into recovery (i.e.,
-	 * ignore this transition for link state special handling purposes)
-	 * without even updating ipath_lastibcstat.
-	 */
-	if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
-	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
-	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
-		goto done;
-
-	/*
-	 * if linkstate transitions into INIT from any of the various down
-	 * states, or if it transitions from any of the up (INIT or better)
-	 * states into any of the down states (except link recovery), then
-	 * call the chip-specific code to take appropriate actions.
-	 */
-	if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
-		lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
-		/* transitioned to UP */
-		if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
-			/* link came up, so we must no longer be disabled */
-			dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-			ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
-			goto skip_ibchange; /* chip-code handled */
-		}
-	} else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
-		(dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
-		ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
-		ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
-		int handled;
-		handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
-		dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
-		if (handled) {
-			ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
-			goto skip_ibchange; /* chip-code handled */
-		}
-	}
-
-	/*
-	 * Significant enough to always print and get into logs, if it was
-	 * unexpected.  If it was a requested state change, we'll have
-	 * already cleared the flags, so we won't print this warning
-	 */
-	if ((ibstate != arm && ibstate != active) &&
-	    (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-		dev_info(&dd->pcidev->dev, "Link state changed from %s "
-			 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
-			 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
-	}
-
-	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-		u32 lastlts;
-		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-		/*
-		 * Ignore cycling back and forth from Polling.Active to
-		 * Polling.Quiet while waiting for the other end of the link
-		 * to come up, except to try and decide if we are connected
-		 * to a live IB device or not.  We will cycle back and
-		 * forth between them if no cable is plugged in, the other
-		 * device is powered off or disabled, etc.
-		 */
-		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-		    lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-			if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
-			     (++dd->ipath_ibpollcnt == 40)) {
-				dd->ipath_flags |= IPATH_NOCABLE;
-				*dd->ipath_statusp |=
-					IPATH_STATUS_IB_NOCABLE;
-				ipath_cdbg(LINKVERB, "Set NOCABLE\n");
-			}
-			ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
-				ipath_ibcstatus_str[ltstate], ibstate);
-			goto skip_ibchange;
-		}
-	}
-
-	dd->ipath_ibpollcnt = 0; /* not poll*, now */
-	ipath_stats.sps_iblink++;
-
-	if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
-		u64 linkrecov;
-		linkrecov = ipath_snap_cntr(dd,
-			dd->ipath_cregs->cr_iblinkerrrecovcnt);
-		if (linkrecov != dd->ipath_lastlinkrecov) {
-			ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
-				(unsigned long long) ibcs,
-				ib_linkstate(dd, ibcs),
-				ipath_ibcstatus_str[ltstate],
-				(unsigned long long) linkrecov);
-			/* and no more until active again */
-			dd->ipath_lastlinkrecov = 0;
-			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-			goto skip_ibchange;
-		}
-	}
-
-	if (ibstate == init || ibstate == arm || ibstate == active) {
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-		if (ibstate == init || ibstate == arm) {
-			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-			if (dd->ipath_flags & IPATH_LINKACTIVE)
-				signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		}
-		if (ibstate == arm) {
-			dd->ipath_flags |= IPATH_LINKARMED;
-			dd->ipath_flags &= ~(IPATH_LINKUNK |
-				IPATH_LINKINIT | IPATH_LINKDOWN |
-				IPATH_LINKACTIVE | IPATH_NOCABLE);
-			ipath_hol_down(dd);
-		} else  if (ibstate == init) {
-			/*
-			 * set INIT and DOWN.  Down is checked by
-			 * most of the other code, but INIT is
-			 * useful to know in a few places.
-			 */
-			dd->ipath_flags |= IPATH_LINKINIT |
-				IPATH_LINKDOWN;
-			dd->ipath_flags &= ~(IPATH_LINKUNK |
-				IPATH_LINKARMED | IPATH_LINKACTIVE |
-				IPATH_NOCABLE);
-			ipath_hol_down(dd);
-		} else {  /* active */
-			dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
-				dd->ipath_cregs->cr_iblinkerrrecovcnt);
-			*dd->ipath_statusp |=
-				IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-			dd->ipath_flags |= IPATH_LINKACTIVE;
-			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-				| IPATH_LINKDOWN | IPATH_LINKARMED |
-				IPATH_NOCABLE);
-			if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-				ipath_restart_sdma(dd);
-			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-			/* LED active not handled in chip _f_updown */
-			dd->ipath_f_setextled(dd, lstate, ltstate);
-			ipath_hol_up(dd);
-		}
-
-		/*
-		 * print after we've already done the work, so as not to
-		 * delay the state changes and notifications, for debugging
-		 */
-		if (lstate == lastlstate)
-			ipath_cdbg(LINKVERB, "Unchanged from last: %s "
-				"(%x)\n", ib_linkstate(dd, ibcs), ibstate);
-		else
-			ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
-				  dd->ipath_unit, ib_linkstate(dd, ibcs),
-				  ipath_ibcstatus_str[ltstate],  ibstate);
-	} else { /* down */
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		dd->ipath_flags |= IPATH_LINKDOWN;
-		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-				     | IPATH_LINKACTIVE |
-				     IPATH_LINKARMED);
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-		dd->ipath_lli_counter = 0;
-
-		if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
-			ipath_cdbg(VERBOSE, "Unit %u link state down "
-				   "(state 0x%x), from %s\n",
-				   dd->ipath_unit, lstate,
-				   ib_linkstate(dd, dd->ipath_lastibcstat));
-		else
-			ipath_cdbg(LINKVERB, "Unit %u link state changed "
-				   "to %s (0x%x) from down (%x)\n",
-				   dd->ipath_unit,
-				   ipath_ibcstatus_str[ltstate],
-				   ibstate, lastlstate);
-	}
-
-skip_ibchange:
-	dd->ipath_lastibcstat = ibcs;
-done:
-	return;
-}
-
-static void handle_supp_msgs(struct ipath_devdata *dd,
-			     unsigned supp_msgs, char *msg, u32 msgsz)
-{
-	/*
-	 * Print the message unless it's ibc status change only, which
-	 * happens so often we never want to count it.
-	 */
-	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-		int iserr;
-		ipath_err_t mask;
-		iserr = ipath_decode_err(dd, msg, msgsz,
-					 dd->ipath_lasterror &
-					 ~INFINIPATH_E_IBSTATUSCHANGED);
-
-		mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-			INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
-
-		/* if we're in debug, then don't mask SDMADISABLED msgs */
-		if (ipath_debug & __IPATH_DBG)
-			mask &= ~INFINIPATH_E_SDMADISABLED;
-
-		if (dd->ipath_lasterror & ~mask)
-			ipath_dev_err(dd, "Suppressed %u messages for "
-				      "fast-repeating errors (%s) (%llx)\n",
-				      supp_msgs, msg,
-				      (unsigned long long)
-				      dd->ipath_lasterror);
-		else {
-			/*
-			 * rcvegrfull and rcvhdrqfull are "normal", for some
-			 * types of processes (mostly benchmarks) that send
-			 * huge numbers of messages, while not processing
-			 * them. So only complain about these at debug
-			 * level.
-			 */
-			if (iserr)
-				ipath_dbg("Suppressed %u messages for %s\n",
-					  supp_msgs, msg);
-			else
-				ipath_cdbg(ERRPKT,
-					"Suppressed %u messages for %s\n",
-					  supp_msgs, msg);
-		}
-	}
-}
-
-static unsigned handle_frequent_errors(struct ipath_devdata *dd,
-				       ipath_err_t errs, char *msg,
-				       u32 msgsz, int *noprint)
-{
-	unsigned long nc;
-	static unsigned long nextmsg_time;
-	static unsigned nmsgs, supp_msgs;
-
-	/*
-	 * Throttle back "fast" messages to no more than 10 per 5 seconds.
-	 * This isn't perfect, but it's a reasonable heuristic. If we get
-	 * more than 10, give a 6x longer delay.
-	 */
-	nc = jiffies;
-	if (nmsgs > 10) {
-		if (time_before(nc, nextmsg_time)) {
-			*noprint = 1;
-			if (!supp_msgs++)
-				nextmsg_time = nc + HZ * 3;
-		} else if (supp_msgs) {
-			handle_supp_msgs(dd, supp_msgs, msg, msgsz);
-			supp_msgs = 0;
-			nmsgs = 0;
-		}
-	} else if (!nmsgs++ || time_after(nc, nextmsg_time)) {
-		nextmsg_time = nc + HZ / 2;
-	}
-
-	return supp_msgs;
-}
-
-static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-	unsigned long flags;
-	int expected;
-
-	if (ipath_debug & __IPATH_DBG) {
-		char msg[128];
-		ipath_decode_err(dd, msg, sizeof msg, errs &
-			INFINIPATH_E_SDMAERRS);
-		ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
-	}
-	if (ipath_debug & __IPATH_VERBDBG) {
-		unsigned long tl, hd, status, lengen;
-		tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-		hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-		status = ipath_read_kreg64(dd
-			, dd->ipath_kregs->kr_senddmastatus);
-		lengen = ipath_read_kreg64(dd,
-			dd->ipath_kregs->kr_senddmalengen);
-		ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
-			"lengen 0x%lx\n", tl, hd, status, lengen);
-	}
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-	expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	if (!expected)
-		ipath_cancel_sends(dd, 1);
-}
-
-static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
-{
-	unsigned long flags;
-	int expected;
-
-	if ((istat & INFINIPATH_I_SDMAINT) &&
-	    !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		ipath_sdma_intr(dd);
-
-	if (istat & INFINIPATH_I_SDMADISABLED) {
-		expected = test_bit(IPATH_SDMA_ABORTING,
-			&dd->ipath_sdma_status);
-		ipath_dbg("%s SDmaDisabled intr\n",
-			expected ? "expected" : "unexpected");
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-		if (!expected)
-			ipath_cancel_sends(dd, 1);
-		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-	}
-}
-
-static int handle_hdrq_full(struct ipath_devdata *dd)
-{
-	int chkerrpkts = 0;
-	u32 hd, tl;
-	u32 i;
-
-	ipath_stats.sps_hdrqfull++;
-	for (i = 0; i < dd->ipath_cfgports; i++) {
-		struct ipath_portdata *pd = dd->ipath_pd[i];
-
-		if (i == 0) {
-			/*
-			 * For kernel receive queues, we just want to know
-			 * if there are packets in the queue that we can
-			 * process.
-			 */
-			if (pd->port_head != ipath_get_hdrqtail(pd))
-				chkerrpkts |= 1 << i;
-			continue;
-		}
-
-		/* Skip if user context is not open */
-		if (!pd || !pd->port_cnt)
-			continue;
-
-		/* Don't report the same point multiple times. */
-		if (dd->ipath_flags & IPATH_NODMA_RTAIL)
-			tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
-		else
-			tl = ipath_get_rcvhdrtail(pd);
-		if (tl == pd->port_lastrcvhdrqtail)
-			continue;
-
-		hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
-		if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
-			pd->port_lastrcvhdrqtail = tl;
-			pd->port_hdrqfull++;
-			/* flush hdrqfull so that poll() sees it */
-			wmb();
-			wake_up_interruptible(&pd->port_wait);
-		}
-	}
-
-	return chkerrpkts;
-}
-
-static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-	char msg[128];
-	u64 ignore_this_time = 0;
-	u64 iserr = 0;
-	int chkerrpkts = 0, noprint = 0;
-	unsigned supp_msgs;
-	int log_idx;
-
-	/*
-	 * don't report errors that are masked, either at init
-	 * (not set in ipath_errormask), or temporarily (set in
-	 * ipath_maskederrs)
-	 */
-	errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
-
-	supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
-		&noprint);
-
-	/* do these first, they are most important */
-	if (errs & INFINIPATH_E_HARDWARE) {
-		/* reuse same msg buf */
-		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
-	} else {
-		u64 mask;
-		for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
-			mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
-			if (errs & mask)
-				ipath_inc_eeprom_err(dd, log_idx, 1);
-		}
-	}
-
-	if (errs & INFINIPATH_E_SDMAERRS)
-		handle_sdma_errors(dd, errs);
-
-	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
-		ipath_dev_err(dd, "error interrupt with unknown errors "
-			      "%llx set\n", (unsigned long long)
-			      (errs & ~dd->ipath_e_bitsextant));
-
-	if (errs & E_SUM_ERRS)
-		ignore_this_time = handle_e_sum_errs(dd, errs);
-	else if ((errs & E_SUM_LINK_PKTERRS) &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		/*
-		 * This can happen when SMA is trying to bring the link
-		 * up, but the IB link changes state at the "wrong" time.
-		 * The IB logic then complains that the packet isn't
-		 * valid.  We don't want to confuse people, so we just
-		 * don't print them, except at debug
-		 */
-		ipath_dbg("Ignoring packet errors %llx, because link not "
-			  "ACTIVE\n", (unsigned long long) errs);
-		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-	}
-
-	if (supp_msgs == 250000) {
-		int s_iserr;
-		/*
-		 * It's not entirely reasonable assuming that the errors set
-		 * in the last clear period are all responsible for the
-		 * problem, but the alternative is to assume it's the only
-		 * ones on this particular interrupt, which also isn't great
-		 */
-		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
-
-		dd->ipath_errormask &= ~dd->ipath_maskederrs;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-				 dd->ipath_errormask);
-		s_iserr = ipath_decode_err(dd, msg, sizeof msg,
-					   dd->ipath_maskederrs);
-
-		if (dd->ipath_maskederrs &
-		    ~(INFINIPATH_E_RRCVEGRFULL |
-		      INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
-			ipath_dev_err(dd, "Temporarily disabling "
-			    "error(s) %llx reporting; too frequent (%s)\n",
-				(unsigned long long) dd->ipath_maskederrs,
-				msg);
-		else {
-			/*
-			 * rcvegrfull and rcvhdrqfull are "normal",
-			 * for some types of processes (mostly benchmarks)
-			 * that send huge numbers of messages, while not
-			 * processing them.  So only complain about
-			 * these at debug level.
-			 */
-			if (s_iserr)
-				ipath_dbg("Temporarily disabling reporting "
-				    "too frequent queue full errors (%s)\n",
-				    msg);
-			else
-				ipath_cdbg(ERRPKT,
-				    "Temporarily disabling reporting too"
-				    " frequent packet errors (%s)\n",
-				    msg);
-		}
-
-		/*
-		 * Re-enable the masked errors after around 3 minutes.  in
-		 * ipath_get_faststats().  If we have a series of fast
-		 * repeating but different errors, the interval will keep
-		 * stretching out, but that's OK, as that's pretty
-		 * catastrophic.
-		 */
-		dd->ipath_unmasktime = jiffies + HZ * 180;
-	}
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
-	if (ignore_this_time)
-		errs &= ~ignore_this_time;
-	if (errs & ~dd->ipath_lasterror) {
-		errs &= ~dd->ipath_lasterror;
-		/* never suppress duplicate hwerrors or ibstatuschange */
-		dd->ipath_lasterror |= errs &
-			~(INFINIPATH_E_HARDWARE |
-			  INFINIPATH_E_IBSTATUSCHANGED);
-	}
-
-	if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
-		dd->ipath_spectriggerhit++;
-		ipath_dbg("%lu special trigger hits\n",
-			dd->ipath_spectriggerhit);
-	}
-
-	/* likely due to cancel; so suppress message unless verbose */
-	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-		time_after(dd->ipath_lastcancel, jiffies)) {
-		/* armlaunch takes precedence; it often causes both. */
-		ipath_cdbg(VERBOSE,
-			"Suppressed %s error (%llx) after sendbuf cancel\n",
-			(errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
-			"armlaunch" : "sendpktlen", (unsigned long long)errs);
-		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
-	}
-
-	if (!errs)
-		return 0;
-
-	if (!noprint) {
-		ipath_err_t mask;
-		/*
-		 * The ones we mask off are handled specially below
-		 * or above.  Also mask SDMADISABLED by default as it
-		 * is too chatty.
-		 */
-		mask = INFINIPATH_E_IBSTATUSCHANGED |
-			INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-			INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
-
-		/* if we're in debug, then don't mask SDMADISABLED msgs */
-		if (ipath_debug & __IPATH_DBG)
-			mask &= ~INFINIPATH_E_SDMADISABLED;
-
-		ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
-	} else
-		/* so we don't need if (!noprint) at strlcat's below */
-		*msg = 0;
-
-	if (errs & E_SUM_PKTERRS) {
-		ipath_stats.sps_pkterrs++;
-		chkerrpkts = 1;
-	}
-	if (errs & E_SUM_ERRS)
-		ipath_stats.sps_errs++;
-
-	if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
-		ipath_stats.sps_crcerrs++;
-		chkerrpkts = 1;
-	}
-	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
-
-
-	/*
-	 * We don't want to print these two as they happen, or we can make
-	 * the situation even worse, because it takes so long to print
-	 * messages to serial consoles.  Kernel ports get printed from
-	 * fast_stats, no more than every 5 seconds, user ports get printed
-	 * on close
-	 */
-	if (errs & INFINIPATH_E_RRCVHDRFULL)
-		chkerrpkts |= handle_hdrq_full(dd);
-	if (errs & INFINIPATH_E_RRCVEGRFULL) {
-		struct ipath_portdata *pd = dd->ipath_pd[0];
-
-		/*
-		 * since this is of less importance and not likely to
-		 * happen without also getting hdrfull, only count
-		 * occurrences; don't check each port (or even the kernel
-		 * vs user)
-		 */
-		ipath_stats.sps_etidfull++;
-		if (pd->port_head != ipath_get_hdrqtail(pd))
-			chkerrpkts |= 1;
-	}
-
-	/*
-	 * do this before IBSTATUSCHANGED, in case both bits set in a single
-	 * interrupt; we want the STATUSCHANGE to "win", so we do our
-	 * internal copy of state machine correctly
-	 */
-	if (errs & INFINIPATH_E_RIBLOSTLINK) {
-		/*
-		 * force through block below
-		 */
-		errs |= INFINIPATH_E_IBSTATUSCHANGED;
-		ipath_stats.sps_iblink++;
-		dd->ipath_flags |= IPATH_LINKDOWN;
-		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-
-		ipath_dbg("Lost link, link now down (%s)\n",
-			ipath_ibcstatus_str[ipath_read_kreg64(dd,
-			dd->ipath_kregs->kr_ibcstatus) & 0xf]);
-	}
-	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-		handle_e_ibstatuschanged(dd, errs);
-
-	if (errs & INFINIPATH_E_RESET) {
-		if (!noprint)
-			ipath_dev_err(dd, "Got reset, requires re-init "
-				      "(unload and reload driver)\n");
-		dd->ipath_flags &= ~IPATH_INITTED;	/* needs re-init */
-		/* mark as having had error */
-		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
-	}
-
-	if (!noprint && *msg) {
-		if (iserr)
-			ipath_dev_err(dd, "%s error\n", msg);
-	}
-	if (dd->ipath_state_wanted & dd->ipath_flags) {
-		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
-			   "waking\n", dd->ipath_state_wanted,
-			   dd->ipath_flags);
-		wake_up_interruptible(&ipath_state_wait);
-	}
-
-	return chkerrpkts;
-}
-
-/*
- * try to cleanup as much as possible for anything that might have gone
- * wrong while in freeze mode, such as pio buffers being written by user
- * processes (causing armlaunch), send errors due to going into freeze mode,
- * etc., and try to avoid causing extra interrupts while doing so.
- * Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it while in freeze mode (the register values
- * themselves are kept correct).
- * Make sure that we don't lose any important interrupts by using the chip
- * feature that says that writing 0 to a bit in *clear that is set in
- * *status will cause an interrupt to be generated again (if allowed by
- * the *mask value).
- */
-void ipath_clear_freeze(struct ipath_devdata *dd)
-{
-	/* disable error interrupts, to avoid confusion */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
-
-	/* also disable interrupts; errormask is sometimes overwriten */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-	ipath_cancel_sends(dd, 1);
-
-	/* clear the freeze, and be sure chip saw it */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-	/* force in-memory update now we are out of freeze */
-	ipath_force_pio_avail_update(dd);
-
-	/*
-	 * force new interrupt if any hwerr, error or interrupt bits are
-	 * still set, and clear "safe" send packet errors related to freeze
-	 * and cancelling sends.  Re-enable error interrupts before possible
-	 * force of re-interrupt on pending interrupts.
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-		E_SPKT_ERRS_IGNORE);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-}
-
-
-/* this is separate to allow for better optimization of ipath_intr() */
-
-static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
-{
-	/*
-	 * sometimes happen during driver init and unload, don't want
-	 * to process any interrupts at that point
-	 */
-
-	/* this is just a bandaid, not a fix, if something goes badly
-	 * wrong */
-	if (++*unexpectp > 100) {
-		if (++*unexpectp > 105) {
-			/*
-			 * ok, we must be taking somebody else's interrupts,
-			 * due to a messed up mptable and/or PIRQ table, so
-			 * unregister the interrupt.  We've seen this during
-			 * linuxbios development work, and it may happen in
-			 * the future again.
-			 */
-			if (dd->pcidev && dd->ipath_irq) {
-				ipath_dev_err(dd, "Now %u unexpected "
-					      "interrupts, unregistering "
-					      "interrupt handler\n",
-					      *unexpectp);
-				ipath_dbg("free_irq of irq %d\n",
-					  dd->ipath_irq);
-				dd->ipath_f_free_irq(dd);
-			}
-		}
-		if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
-			ipath_dev_err(dd, "%u unexpected interrupts, "
-				      "disabling interrupts completely\n",
-				      *unexpectp);
-			/*
-			 * disable all interrupts, something is very wrong
-			 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-					 0ULL);
-		}
-	} else if (*unexpectp > 1)
-		ipath_dbg("Interrupt when not ready, should not happen, "
-			  "ignoring\n");
-}
-
-static noinline void ipath_bad_regread(struct ipath_devdata *dd)
-{
-	static int allbits;
-
-	/* separate routine, for better optimization of ipath_intr() */
-
-	/*
-	 * We print the message and disable interrupts, in hope of
-	 * having a better chance of debugging the problem.
-	 */
-	ipath_dev_err(dd,
-		      "Read of interrupt status failed (all bits set)\n");
-	if (allbits++) {
-		/* disable all interrupts, something is very wrong */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-		if (allbits == 2) {
-			ipath_dev_err(dd, "Still bad interrupt status, "
-				      "unregistering interrupt\n");
-			dd->ipath_f_free_irq(dd);
-		} else if (allbits > 2) {
-			if ((allbits % 10000) == 0)
-				printk(".");
-		} else
-			ipath_dev_err(dd, "Disabling interrupts, "
-				      "multiple errors\n");
-	}
-}
-
-static void handle_layer_pioavail(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	int ret;
-
-	ret = ipath_ib_piobufavail(dd->verbs_dev);
-	if (ret > 0)
-		goto set;
-
-	return;
-set:
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-/*
- * Handle receive interrupts for user ports; this means a user
- * process was waiting for a packet to arrive, and didn't want
- * to poll
- */
-static void handle_urcv(struct ipath_devdata *dd, u64 istat)
-{
-	u64 portr;
-	int i;
-	int rcvdint = 0;
-
-	/*
-	 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
-	 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
-	 * would both like timely updates of the bits so that
-	 * we don't pass them by unnecessarily.  the rmb()
-	 * here ensures that we see them promptly -- the
-	 * corresponding wmb()'s are in ipath_poll_urgent()
-	 * and ipath_poll_next()...
-	 */
-	rmb();
-	portr = ((istat >> dd->ipath_i_rcvavail_shift) &
-		 dd->ipath_i_rcvavail_mask) |
-		((istat >> dd->ipath_i_rcvurg_shift) &
-		 dd->ipath_i_rcvurg_mask);
-	for (i = 1; i < dd->ipath_cfgports; i++) {
-		struct ipath_portdata *pd = dd->ipath_pd[i];
-
-		if (portr & (1 << i) && pd && pd->port_cnt) {
-			if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
-					       &pd->port_flag)) {
-				clear_bit(i + dd->ipath_r_intravail_shift,
-					  &dd->ipath_rcvctrl);
-				wake_up_interruptible(&pd->port_wait);
-				rcvdint = 1;
-			} else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
-						      &pd->port_flag)) {
-				pd->port_urgent++;
-				wake_up_interruptible(&pd->port_wait);
-			}
-		}
-	}
-	if (rcvdint) {
-		/* only want to take one interrupt, so turn off the rcv
-		 * interrupt for all the ports that we set the rcv_waiting
-		 * (but never for kernel port)
-		 */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-				 dd->ipath_rcvctrl);
-	}
-}
-
-irqreturn_t ipath_intr(int irq, void *data)
-{
-	struct ipath_devdata *dd = data;
-	u64 istat, chk0rcv = 0;
-	ipath_err_t estat = 0;
-	irqreturn_t ret;
-	static unsigned unexpected = 0;
-	u64 kportrbits;
-
-	ipath_stats.sps_ints++;
-
-	if (dd->ipath_int_counter != (u32) -1)
-		dd->ipath_int_counter++;
-
-	if (!(dd->ipath_flags & IPATH_PRESENT)) {
-		/*
-		 * This return value is not great, but we do not want the
-		 * interrupt core code to remove our interrupt handler
-		 * because we don't appear to be handling an interrupt
-		 * during a chip reset.
-		 */
-		return IRQ_HANDLED;
-	}
-
-	/*
-	 * this needs to be flags&initted, not statusp, so we keep
-	 * taking interrupts even after link goes down, etc.
-	 * Also, we *must* clear the interrupt at some point, or we won't
-	 * take it again, which can be real bad for errors, etc...
-	 */
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		ipath_bad_intr(dd, &unexpected);
-		ret = IRQ_NONE;
-		goto bail;
-	}
-
-	istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
-
-	if (unlikely(!istat)) {
-		ipath_stats.sps_nullintr++;
-		ret = IRQ_NONE; /* not our interrupt, or already handled */
-		goto bail;
-	}
-	if (unlikely(istat == -1)) {
-		ipath_bad_regread(dd);
-		/* don't know if it was our interrupt or not */
-		ret = IRQ_NONE;
-		goto bail;
-	}
-
-	if (unexpected)
-		unexpected = 0;
-
-	if (unlikely(istat & ~dd->ipath_i_bitsextant))
-		ipath_dev_err(dd,
-			      "interrupt with unknown interrupts %Lx set\n",
-			      (unsigned long long)
-			      istat & ~dd->ipath_i_bitsextant);
-	else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
-		ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
-			(unsigned long long) istat);
-
-	if (istat & INFINIPATH_I_ERROR) {
-		ipath_stats.sps_errints++;
-		estat = ipath_read_kreg64(dd,
-					  dd->ipath_kregs->kr_errorstatus);
-		if (!estat)
-			dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
-				 "but no error bits set!\n",
-				 (unsigned long long) istat);
-		else if (estat == -1LL)
-			/*
-			 * should we try clearing all, or hope next read
-			 * works?
-			 */
-			ipath_dev_err(dd, "Read of error status failed "
-				      "(all bits set); ignoring\n");
-		else
-			chk0rcv |= handle_errors(dd, estat);
-	}
-
-	if (istat & INFINIPATH_I_GPIO) {
-		/*
-		 * GPIO interrupts fall in two broad classes:
-		 * GPIO_2 indicates (on some HT4xx boards) that a packet
-		 *        has arrived for Port 0. Checking for this
-		 *        is controlled by flag IPATH_GPIO_INTR.
-		 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
-		 *        errors that we need to count. Checking for this
-		 *        is controlled by flag IPATH_GPIO_ERRINTRS.
-		 */
-		u32 gpiostatus;
-		u32 to_clear = 0;
-
-		gpiostatus = ipath_read_kreg32(
-			dd, dd->ipath_kregs->kr_gpio_status);
-		/* First the error-counter case. */
-		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
-		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
-			/* want to clear the bits we see asserted. */
-			to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
-
-			/*
-			 * Count appropriately, clear bits out of our copy,
-			 * as they have been "handled".
-			 */
-			if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
-				ipath_dbg("FlowCtl on UnsupVL\n");
-				dd->ipath_rxfc_unsupvl_errs++;
-			}
-			if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
-				ipath_dbg("Overrun Threshold exceeded\n");
-				dd->ipath_overrun_thresh_errs++;
-			}
-			if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
-				ipath_dbg("Local Link Integrity error\n");
-				dd->ipath_lli_errs++;
-			}
-			gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
-		}
-		/* Now the Port0 Receive case */
-		if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
-		    (dd->ipath_flags & IPATH_GPIO_INTR)) {
-			/*
-			 * GPIO status bit 2 is set, and we expected it.
-			 * clear it and indicate in p0bits.
-			 * This probably only happens if a Port0 pkt
-			 * arrives at _just_ the wrong time, and we
-			 * handle that by seting chk0rcv;
-			 */
-			to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
-			gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
-			chk0rcv = 1;
-		}
-		if (gpiostatus) {
-			/*
-			 * Some unexpected bits remain. If they could have
-			 * caused the interrupt, complain and clear.
-			 * To avoid repetition of this condition, also clear
-			 * the mask. It is almost certainly due to error.
-			 */
-			const u32 mask = (u32) dd->ipath_gpio_mask;
-
-			if (mask & gpiostatus) {
-				ipath_dbg("Unexpected GPIO IRQ bits %x\n",
-				  gpiostatus & mask);
-				to_clear |= (gpiostatus & mask);
-				dd->ipath_gpio_mask &= ~(gpiostatus & mask);
-				ipath_write_kreg(dd,
-					dd->ipath_kregs->kr_gpio_mask,
-					dd->ipath_gpio_mask);
-			}
-		}
-		if (to_clear) {
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					(u64) to_clear);
-		}
-	}
-
-	/*
-	 * Clear the interrupt bits we found set, unless they are receive
-	 * related, in which case we already cleared them above, and don't
-	 * want to clear them again, because we might lose an interrupt.
-	 * Clear it early, so we "know" know the chip will have seen this by
-	 * the time we process the queue, and will re-interrupt if necessary.
-	 * The processor itself won't take the interrupt again until we return.
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
-
-	/*
-	 * Handle kernel receive queues before checking for pio buffers
-	 * available since receives can overflow; piobuf waiters can afford
-	 * a few extra cycles, since they were waiting anyway, and user's
-	 * waiting for receive are at the bottom.
-	 */
-	kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
-		(1ULL << dd->ipath_i_rcvurg_shift);
-	if (chk0rcv || (istat & kportrbits)) {
-		istat &= ~kportrbits;
-		ipath_kreceive(dd->ipath_pd[0]);
-	}
-
-	if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
-		     (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
-		handle_urcv(dd, istat);
-
-	if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
-		handle_sdma_intr(dd, istat);
-
-	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-		/* always process; sdma verbs uses PIO for acks and VL15  */
-		handle_layer_pioavail(dd);
-	}
-
-	ret = IRQ_HANDLED;
-
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_kernel.h b/drivers/staging/rdma/ipath/ipath_kernel.h
deleted file mode 100644
index 66c934a..0000000
--- a/drivers/staging/rdma/ipath/ipath_kernel.h
+++ /dev/null
@@ -1,1374 +0,0 @@
-#ifndef _IPATH_KERNEL_H
-#define _IPATH_KERNEL_H
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This header file is the base header file for infinipath kernel code
- * ipath_user.h serves a similar purpose for user code.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_common.h"
-#include "ipath_debug.h"
-#include "ipath_registers.h"
-
-/* only s/w major version of InfiniPath we can handle */
-#define IPATH_CHIP_VERS_MAJ 2U
-
-/* don't care about this except printing */
-#define IPATH_CHIP_VERS_MIN 0U
-
-/* temporary, maybe always */
-extern struct infinipath_stats ipath_stats;
-
-#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
-/*
- * First-cut critierion for "device is active" is
- * two thousand dwords combined Tx, Rx traffic per
- * 5-second interval. SMA packets are 64 dwords,
- * and occur "a few per second", presumably each way.
- */
-#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
-/*
- * Struct used to indicate which errors are logged in each of the
- * error-counters that are logged to EEPROM. A counter is incremented
- * _once_ (saturating at 255) for each event with any bits set in
- * the error or hwerror register masks below.
- */
-#define IPATH_EEP_LOG_CNT (4)
-struct ipath_eep_log_mask {
-	u64 errs_to_log;
-	u64 hwerrs_to_log;
-};
-
-struct ipath_portdata {
-	void **port_rcvegrbuf;
-	dma_addr_t *port_rcvegrbuf_phys;
-	/* rcvhdrq base, needs mmap before useful */
-	void *port_rcvhdrq;
-	/* kernel virtual address where hdrqtail is updated */
-	void *port_rcvhdrtail_kvaddr;
-	/*
-	 * temp buffer for expected send setup, allocated at open, instead
-	 * of each setup call
-	 */
-	void *port_tid_pg_list;
-	/* when waiting for rcv or pioavail */
-	wait_queue_head_t port_wait;
-	/*
-	 * rcvegr bufs base, physical, must fit
-	 * in 44 bits so 32 bit programs mmap64 44 bit works)
-	 */
-	dma_addr_t port_rcvegr_phys;
-	/* mmap of hdrq, must fit in 44 bits */
-	dma_addr_t port_rcvhdrq_phys;
-	dma_addr_t port_rcvhdrqtailaddr_phys;
-	/*
-	 * number of opens (including slave subports) on this instance
-	 * (ignoring forks, dup, etc. for now)
-	 */
-	int port_cnt;
-	/*
-	 * how much space to leave at start of eager TID entries for
-	 * protocol use, on each TID
-	 */
-	/* instead of calculating it */
-	unsigned port_port;
-	/* non-zero if port is being shared. */
-	u16 port_subport_cnt;
-	/* non-zero if port is being shared. */
-	u16 port_subport_id;
-	/* number of pio bufs for this port (all procs, if shared) */
-	u32 port_piocnt;
-	/* first pio buffer for this port */
-	u32 port_pio_base;
-	/* chip offset of PIO buffers for this port */
-	u32 port_piobufs;
-	/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
-	u32 port_rcvegrbuf_chunks;
-	/* how many egrbufs per chunk */
-	u32 port_rcvegrbufs_perchunk;
-	/* order for port_rcvegrbuf_pages */
-	size_t port_rcvegrbuf_size;
-	/* rcvhdrq size (for freeing) */
-	size_t port_rcvhdrq_size;
-	/* next expected TID to check when looking for free */
-	u32 port_tidcursor;
-	/* next expected TID to check */
-	unsigned long port_flag;
-	/* what happened */
-	unsigned long int_flag;
-	/* WAIT_RCV that timed out, no interrupt */
-	u32 port_rcvwait_to;
-	/* WAIT_PIO that timed out, no interrupt */
-	u32 port_piowait_to;
-	/* WAIT_RCV already happened, no wait */
-	u32 port_rcvnowait;
-	/* WAIT_PIO already happened, no wait */
-	u32 port_pionowait;
-	/* total number of rcvhdrqfull errors */
-	u32 port_hdrqfull;
-	/*
-	 * Used to suppress multiple instances of same
-	 * port staying stuck at same point.
-	 */
-	u32 port_lastrcvhdrqtail;
-	/* saved total number of rcvhdrqfull errors for poll edge trigger */
-	u32 port_hdrqfull_poll;
-	/* total number of polled urgent packets */
-	u32 port_urgent;
-	/* saved total number of polled urgent packets for poll edge trigger */
-	u32 port_urgent_poll;
-	/* pid of process using this port */
-	struct pid *port_pid;
-	struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
-	/* same size as task_struct .comm[] */
-	char port_comm[TASK_COMM_LEN];
-	/* pkeys set by this use of this port */
-	u16 port_pkeys[4];
-	/* so file ops can get at unit */
-	struct ipath_devdata *port_dd;
-	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
-	void *subport_uregbase;
-	/* An array of pages for the eager receive buffers * N */
-	void *subport_rcvegrbuf;
-	/* An array of pages for the eager header queue entries * N */
-	void *subport_rcvhdr_base;
-	/* The version of the library which opened this port */
-	u32 userversion;
-	/* Bitmask of active slaves */
-	u32 active_slaves;
-	/* Type of packets or conditions we want to poll for */
-	u16 poll_type;
-	/* port rcvhdrq head offset */
-	u32 port_head;
-	/* receive packet sequence counter */
-	u32 port_seq_cnt;
-};
-
-struct sk_buff;
-struct ipath_sge_state;
-struct ipath_verbs_txreq;
-
-/*
- * control information for layered drivers
- */
-struct _ipath_layer {
-	void *l_arg;
-};
-
-struct ipath_skbinfo {
-	struct sk_buff *skb;
-	dma_addr_t phys;
-};
-
-struct ipath_sdma_txreq {
-	int                 flags;
-	int                 sg_count;
-	union {
-		struct scatterlist *sg;
-		void *map_addr;
-	};
-	void              (*callback)(void *, int);
-	void               *callback_cookie;
-	int                 callback_status;
-	u16                 start_idx;  /* sdma private */
-	u16                 next_descq_idx;  /* sdma private */
-	struct list_head    list;       /* sdma private */
-};
-
-struct ipath_sdma_desc {
-	__le64 qw[2];
-};
-
-#define IPATH_SDMA_TXREQ_F_USELARGEBUF  0x1
-#define IPATH_SDMA_TXREQ_F_HEADTOHOST   0x2
-#define IPATH_SDMA_TXREQ_F_INTREQ       0x4
-#define IPATH_SDMA_TXREQ_F_FREEBUF      0x8
-#define IPATH_SDMA_TXREQ_F_FREEDESC     0x10
-#define IPATH_SDMA_TXREQ_F_VL15         0x20
-
-#define IPATH_SDMA_TXREQ_S_OK        0
-#define IPATH_SDMA_TXREQ_S_SENDERROR 1
-#define IPATH_SDMA_TXREQ_S_ABORTED   2
-#define IPATH_SDMA_TXREQ_S_SHUTDOWN  3
-
-#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG	(1ull << 63)
-#define IPATH_SDMA_STATUS_ABORT_IN_PROG			(1ull << 62)
-#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE		(1ull << 61)
-#define IPATH_SDMA_STATUS_SCB_EMPTY			(1ull << 30)
-
-/* max dwords in small buffer packet */
-#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
-
-/*
- * Possible IB config parameters for ipath_f_get/set_ib_cfg()
- */
-#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
-#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
-#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
-#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
-#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
-#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
-#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
-#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
-#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
-#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
-#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
-
-
-struct ipath_devdata {
-	struct list_head ipath_list;
-
-	struct ipath_kregs const *ipath_kregs;
-	struct ipath_cregs const *ipath_cregs;
-
-	/* mem-mapped pointer to base of chip regs */
-	u64 __iomem *ipath_kregbase;
-	/* end of mem-mapped chip space; range checking */
-	u64 __iomem *ipath_kregend;
-	/* physical address of chip for io_remap, etc. */
-	unsigned long ipath_physaddr;
-	/* base of memory alloced for ipath_kregbase, for free */
-	u64 *ipath_kregalloc;
-	/* ipath_cfgports pointers */
-	struct ipath_portdata **ipath_pd;
-	/* sk_buffs used by port 0 eager receive queue */
-	struct ipath_skbinfo *ipath_port0_skbinfo;
-	/* kvirt address of 1st 2k pio buffer */
-	void __iomem *ipath_pio2kbase;
-	/* kvirt address of 1st 4k pio buffer */
-	void __iomem *ipath_pio4kbase;
-	/*
-	 * points to area where PIOavail registers will be DMA'ed.
-	 * Has to be on a page of it's own, because the page will be
-	 * mapped into user program space.  This copy is *ONLY* ever
-	 * written by DMA, not by the driver!  Need a copy per device
-	 * when we get to multiple devices
-	 */
-	volatile __le64 *ipath_pioavailregs_dma;
-	/* physical address where updates occur */
-	dma_addr_t ipath_pioavailregs_phys;
-	struct _ipath_layer ipath_layer;
-	/* setup intr */
-	int (*ipath_f_intrsetup)(struct ipath_devdata *);
-	/* fallback to alternate interrupt type if possible */
-	int (*ipath_f_intr_fallback)(struct ipath_devdata *);
-	/* setup on-chip bus config */
-	int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
-	/* hard reset chip */
-	int (*ipath_f_reset)(struct ipath_devdata *);
-	int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
-				     size_t);
-	void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
-	void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
-					size_t);
-	void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
-	int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
-	int (*ipath_f_early_init)(struct ipath_devdata *);
-	void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
-	void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
-				u32, unsigned long);
-	void (*ipath_f_tidtemplate)(struct ipath_devdata *);
-	void (*ipath_f_cleanup)(struct ipath_devdata *);
-	void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
-	/* fill out chip-specific fields */
-	int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-	/* free irq */
-	void (*ipath_f_free_irq)(struct ipath_devdata *);
-	struct ipath_message_header *(*ipath_f_get_msgheader)
-					(struct ipath_devdata *, __le32 *);
-	void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
-	int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
-	int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
-	void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
-	void (*ipath_f_read_counters)(struct ipath_devdata *,
-					struct infinipath_counters *);
-	void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
-	/* per chip actions needed for IB Link up/down changes */
-	int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
-
-	unsigned ipath_lastegr_idx;
-	struct ipath_ibdev *verbs_dev;
-	struct timer_list verbs_timer;
-	/* total dwords sent (summed from counter) */
-	u64 ipath_sword;
-	/* total dwords rcvd (summed from counter) */
-	u64 ipath_rword;
-	/* total packets sent (summed from counter) */
-	u64 ipath_spkts;
-	/* total packets rcvd (summed from counter) */
-	u64 ipath_rpkts;
-	/* ipath_statusp initially points to this. */
-	u64 _ipath_status;
-	/* GUID for this interface, in network order */
-	__be64 ipath_guid;
-	/*
-	 * aggregrate of error bits reported since last cleared, for
-	 * limiting of error reporting
-	 */
-	ipath_err_t ipath_lasterror;
-	/*
-	 * aggregrate of error bits reported since last cleared, for
-	 * limiting of hwerror reporting
-	 */
-	ipath_err_t ipath_lasthwerror;
-	/* errors masked because they occur too fast */
-	ipath_err_t ipath_maskederrs;
-	u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
-	/* these 5 fields are used to establish deltas for IB Symbol
-	 * errors and linkrecovery errors. They can be reported on
-	 * some chips during link negotiation prior to INIT, and with
-	 * DDR when faking DDR negotiations with non-IBTA switches.
-	 * The chip counters are adjusted at driver unload if there is
-	 * a non-zero delta.
-	 */
-	u64 ibdeltainprog;
-	u64 ibsymdelta;
-	u64 ibsymsnap;
-	u64 iblnkerrdelta;
-	u64 iblnkerrsnap;
-
-	/* time in jiffies at which to re-enable maskederrs */
-	unsigned long ipath_unmasktime;
-	/* count of egrfull errors, combined for all ports */
-	u64 ipath_last_tidfull;
-	/* for ipath_qcheck() */
-	u64 ipath_lastport0rcv_cnt;
-	/* template for writing TIDs  */
-	u64 ipath_tidtemplate;
-	/* value to write to free TIDs */
-	u64 ipath_tidinvalid;
-	/* IBA6120 rcv interrupt setup */
-	u64 ipath_rhdrhead_intr_off;
-
-	/* size of memory at ipath_kregbase */
-	u32 ipath_kregsize;
-	/* number of registers used for pioavail */
-	u32 ipath_pioavregs;
-	/* IPATH_POLL, etc. */
-	u32 ipath_flags;
-	/* ipath_flags driver is waiting for */
-	u32 ipath_state_wanted;
-	/* last buffer for user use, first buf for kernel use is this
-	 * index. */
-	u32 ipath_lastport_piobuf;
-	/* is a stats timer active */
-	u32 ipath_stats_timer_active;
-	/* number of interrupts for this device -- saturates... */
-	u32 ipath_int_counter;
-	/* dwords sent read from counter */
-	u32 ipath_lastsword;
-	/* dwords received read from counter */
-	u32 ipath_lastrword;
-	/* sent packets read from counter */
-	u32 ipath_lastspkts;
-	/* received packets read from counter */
-	u32 ipath_lastrpkts;
-	/* pio bufs allocated per port */
-	u32 ipath_pbufsport;
-	/* if remainder on bufs/port, ports < extrabuf get 1 extra */
-	u32 ipath_ports_extrabuf;
-	u32 ipath_pioupd_thresh; /* update threshold, some chips */
-	/*
-	 * number of ports configured as max; zero is set to number chip
-	 * supports, less gives more pio bufs/port, etc.
-	 */
-	u32 ipath_cfgports;
-	/* count of port 0 hdrqfull errors */
-	u32 ipath_p0_hdrqfull;
-	/* port 0 number of receive eager buffers */
-	u32 ipath_p0_rcvegrcnt;
-
-	/*
-	 * index of last piobuffer we used.  Speeds up searching, by
-	 * starting at this point.  Doesn't matter if multiple cpu's use and
-	 * update, last updater is only write that matters.  Whenever it
-	 * wraps, we update shadow copies.  Need a copy per device when we
-	 * get to multiple devices
-	 */
-	u32 ipath_lastpioindex;
-	u32 ipath_lastpioindexl;
-	/* max length of freezemsg */
-	u32 ipath_freezelen;
-	/*
-	 * consecutive times we wanted a PIO buffer but were unable to
-	 * get one
-	 */
-	u32 ipath_consec_nopiobuf;
-	/*
-	 * hint that we should update ipath_pioavailshadow before
-	 * looking for a PIO buffer
-	 */
-	u32 ipath_upd_pio_shadow;
-	/* so we can rewrite it after a chip reset */
-	u32 ipath_pcibar0;
-	/* so we can rewrite it after a chip reset */
-	u32 ipath_pcibar1;
-	u32 ipath_x1_fix_tries;
-	u32 ipath_autoneg_tries;
-	u32 serdes_first_init_done;
-
-	struct ipath_relock {
-		atomic_t ipath_relock_timer_active;
-		struct timer_list ipath_relock_timer;
-		unsigned int ipath_relock_interval; /* in jiffies */
-	} ipath_relock_singleton;
-
-	/* interrupt number */
-	int ipath_irq;
-	/* HT/PCI Vendor ID (here for NodeInfo) */
-	u16 ipath_vendorid;
-	/* HT/PCI Device ID (here for NodeInfo) */
-	u16 ipath_deviceid;
-	/* offset in HT config space of slave/primary interface block */
-	u8 ipath_ht_slave_off;
-	/* for write combining settings */
-	int wc_cookie;
-	/* ref count for each pkey */
-	atomic_t ipath_pkeyrefs[4];
-	/* shadow copy of struct page *'s for exp tid pages */
-	struct page **ipath_pageshadow;
-	/* shadow copy of dma handles for exp tid pages */
-	dma_addr_t *ipath_physshadow;
-	u64 __iomem *ipath_egrtidbase;
-	/* lock to workaround chip bug 9437 and others */
-	spinlock_t ipath_kernel_tid_lock;
-	spinlock_t ipath_user_tid_lock;
-	spinlock_t ipath_sendctrl_lock;
-	/* around ipath_pd and (user ports) port_cnt use (intr vs free) */
-	spinlock_t ipath_uctxt_lock;
-
-	/*
-	 * IPATH_STATUS_*,
-	 * this address is mapped readonly into user processes so they can
-	 * get status cheaply, whenever they want.
-	 */
-	u64 *ipath_statusp;
-	/* freeze msg if hw error put chip in freeze */
-	char *ipath_freezemsg;
-	/* pci access data structure */
-	struct pci_dev *pcidev;
-	struct cdev *user_cdev;
-	struct cdev *diag_cdev;
-	struct device *user_dev;
-	struct device *diag_dev;
-	/* timer used to prevent stats overflow, error throttling, etc. */
-	struct timer_list ipath_stats_timer;
-	/* timer to verify interrupts work, and fallback if possible */
-	struct timer_list ipath_intrchk_timer;
-	void *ipath_dummy_hdrq;	/* used after port close */
-	dma_addr_t ipath_dummy_hdrq_phys;
-
-	/* SendDMA related entries */
-	spinlock_t            ipath_sdma_lock;
-	unsigned long         ipath_sdma_status;
-	unsigned long         ipath_sdma_abort_jiffies;
-	unsigned long         ipath_sdma_abort_intr_timeout;
-	unsigned long         ipath_sdma_buf_jiffies;
-	struct ipath_sdma_desc *ipath_sdma_descq;
-	u64		      ipath_sdma_descq_added;
-	u64		      ipath_sdma_descq_removed;
-	int		      ipath_sdma_desc_nreserved;
-	u16                   ipath_sdma_descq_cnt;
-	u16                   ipath_sdma_descq_tail;
-	u16                   ipath_sdma_descq_head;
-	u16                   ipath_sdma_next_intr;
-	u16                   ipath_sdma_reset_wait;
-	u8                    ipath_sdma_generation;
-	struct tasklet_struct ipath_sdma_abort_task;
-	struct tasklet_struct ipath_sdma_notify_task;
-	struct list_head      ipath_sdma_activelist;
-	struct list_head      ipath_sdma_notifylist;
-	atomic_t              ipath_sdma_vl15_count;
-	struct timer_list     ipath_sdma_vl15_timer;
-
-	dma_addr_t       ipath_sdma_descq_phys;
-	volatile __le64 *ipath_sdma_head_dma;
-	dma_addr_t       ipath_sdma_head_phys;
-
-	unsigned long ipath_ureg_align; /* user register alignment */
-
-	struct delayed_work ipath_autoneg_work;
-	wait_queue_head_t ipath_autoneg_wait;
-
-	/* HoL blocking / user app forward-progress state */
-	unsigned          ipath_hol_state;
-	unsigned          ipath_hol_next;
-	struct timer_list ipath_hol_timer;
-
-	/*
-	 * Shadow copies of registers; size indicates read access size.
-	 * Most of them are readonly, but some are write-only register,
-	 * where we manipulate the bits in the shadow copy, and then write
-	 * the shadow copy to infinipath.
-	 *
-	 * We deliberately make most of these 32 bits, since they have
-	 * restricted range.  For any that we read, we won't to generate 32
-	 * bit accesses, since Opteron will generate 2 separate 32 bit HT
-	 * transactions for a 64 bit read, and we want to avoid unnecessary
-	 * HT transactions.
-	 */
-
-	/* This is the 64 bit group */
-
-	/*
-	 * shadow of pioavail, check to be sure it's large enough at
-	 * init time.
-	 */
-	unsigned long ipath_pioavailshadow[8];
-	/* bitmap of send buffers available for the kernel to use with PIO. */
-	unsigned long ipath_pioavailkernel[8];
-	/* shadow of kr_gpio_out, for rmw ops */
-	u64 ipath_gpio_out;
-	/* shadow the gpio mask register */
-	u64 ipath_gpio_mask;
-	/* shadow the gpio output enable, etc... */
-	u64 ipath_extctrl;
-	/* kr_revision shadow */
-	u64 ipath_revision;
-	/*
-	 * shadow of ibcctrl, for interrupt handling of link changes,
-	 * etc.
-	 */
-	u64 ipath_ibcctrl;
-	/*
-	 * last ibcstatus, to suppress "duplicate" status change messages,
-	 * mostly from 2 to 3
-	 */
-	u64 ipath_lastibcstat;
-	/* hwerrmask shadow */
-	ipath_err_t ipath_hwerrmask;
-	ipath_err_t ipath_errormask; /* errormask shadow */
-	/* interrupt config reg shadow */
-	u64 ipath_intconfig;
-	/* kr_sendpiobufbase value */
-	u64 ipath_piobufbase;
-	/* kr_ibcddrctrl shadow */
-	u64 ipath_ibcddrctrl;
-
-	/* these are the "32 bit" regs */
-
-	/*
-	 * number of GUIDs in the flash for this interface; may need some
-	 * rethinking for setting on other ifaces
-	 */
-	u32 ipath_nguid;
-	/*
-	 * the following two are 32-bit bitmasks, but {test,clear,set}_bit
-	 * all expect bit fields to be "unsigned long"
-	 */
-	/* shadow kr_rcvctrl */
-	unsigned long ipath_rcvctrl;
-	/* shadow kr_sendctrl */
-	unsigned long ipath_sendctrl;
-	/* to not count armlaunch after cancel */
-	unsigned long ipath_lastcancel;
-	/* count cases where special trigger was needed (double write) */
-	unsigned long ipath_spectriggerhit;
-
-	/* value we put in kr_rcvhdrcnt */
-	u32 ipath_rcvhdrcnt;
-	/* value we put in kr_rcvhdrsize */
-	u32 ipath_rcvhdrsize;
-	/* value we put in kr_rcvhdrentsize */
-	u32 ipath_rcvhdrentsize;
-	/* offset of last entry in rcvhdrq */
-	u32 ipath_hdrqlast;
-	/* kr_portcnt value */
-	u32 ipath_portcnt;
-	/* kr_pagealign value */
-	u32 ipath_palign;
-	/* number of "2KB" PIO buffers */
-	u32 ipath_piobcnt2k;
-	/* size in bytes of "2KB" PIO buffers */
-	u32 ipath_piosize2k;
-	/* number of "4KB" PIO buffers */
-	u32 ipath_piobcnt4k;
-	/* size in bytes of "4KB" PIO buffers */
-	u32 ipath_piosize4k;
-	u32 ipath_pioreserved; /* reserved special-inkernel; */
-	/* kr_rcvegrbase value */
-	u32 ipath_rcvegrbase;
-	/* kr_rcvegrcnt value */
-	u32 ipath_rcvegrcnt;
-	/* kr_rcvtidbase value */
-	u32 ipath_rcvtidbase;
-	/* kr_rcvtidcnt value */
-	u32 ipath_rcvtidcnt;
-	/* kr_sendregbase */
-	u32 ipath_sregbase;
-	/* kr_userregbase */
-	u32 ipath_uregbase;
-	/* kr_counterregbase */
-	u32 ipath_cregbase;
-	/* shadow the control register contents */
-	u32 ipath_control;
-	/* PCI revision register (HTC rev on FPGA) */
-	u32 ipath_pcirev;
-
-	/* chip address space used by 4k pio buffers */
-	u32 ipath_4kalign;
-	/* The MTU programmed for this unit */
-	u32 ipath_ibmtu;
-	/*
-	 * The max size IB packet, included IB headers that we can send.
-	 * Starts same as ipath_piosize, but is affected when ibmtu is
-	 * changed, or by size of eager buffers
-	 */
-	u32 ipath_ibmaxlen;
-	/*
-	 * ibmaxlen at init time, limited by chip and by receive buffer
-	 * size.  Not changed after init.
-	 */
-	u32 ipath_init_ibmaxlen;
-	/* size of each rcvegrbuffer */
-	u32 ipath_rcvegrbufsize;
-	/* localbus width (1, 2,4,8,16,32) from config space  */
-	u32 ipath_lbus_width;
-	/* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
-	u32 ipath_lbus_speed;
-	/*
-	 * number of sequential ibcstatus change for polling active/quiet
-	 * (i.e., link not coming up).
-	 */
-	u32 ipath_ibpollcnt;
-	/* low and high portions of MSI capability/vector */
-	u32 ipath_msi_lo;
-	/* saved after PCIe init for restore after reset */
-	u32 ipath_msi_hi;
-	/* MSI data (vector) saved for restore */
-	u16 ipath_msi_data;
-	/* MLID programmed for this instance */
-	u16 ipath_mlid;
-	/* LID programmed for this instance */
-	u16 ipath_lid;
-	/* list of pkeys programmed; 0 if not set */
-	u16 ipath_pkeys[4];
-	/*
-	 * ASCII serial number, from flash, large enough for original
-	 * all digit strings, and longer QLogic serial number format
-	 */
-	u8 ipath_serial[16];
-	/* human readable board version */
-	u8 ipath_boardversion[96];
-	u8 ipath_lbus_info[32]; /* human readable localbus info */
-	/* chip major rev, from ipath_revision */
-	u8 ipath_majrev;
-	/* chip minor rev, from ipath_revision */
-	u8 ipath_minrev;
-	/* board rev, from ipath_revision */
-	u8 ipath_boardrev;
-	/* saved for restore after reset */
-	u8 ipath_pci_cacheline;
-	/* LID mask control */
-	u8 ipath_lmc;
-	/* link width supported */
-	u8 ipath_link_width_supported;
-	/* link speed supported */
-	u8 ipath_link_speed_supported;
-	u8 ipath_link_width_enabled;
-	u8 ipath_link_speed_enabled;
-	u8 ipath_link_width_active;
-	u8 ipath_link_speed_active;
-	/* Rx Polarity inversion (compensate for ~tx on partner) */
-	u8 ipath_rx_pol_inv;
-
-	u8 ipath_r_portenable_shift;
-	u8 ipath_r_intravail_shift;
-	u8 ipath_r_tailupd_shift;
-	u8 ipath_r_portcfg_shift;
-
-	/* unit # of this chip, if present */
-	int ipath_unit;
-
-	/* local link integrity counter */
-	u32 ipath_lli_counter;
-	/* local link integrity errors */
-	u32 ipath_lli_errors;
-	/*
-	 * Above counts only cases where _successive_ LocalLinkIntegrity
-	 * errors were seen in the receive headers of kern-packets.
-	 * Below are the three (monotonically increasing) counters
-	 * maintained via GPIO interrupts on iba6120-rev2.
-	 */
-	u32 ipath_rxfc_unsupvl_errs;
-	u32 ipath_overrun_thresh_errs;
-	u32 ipath_lli_errs;
-
-	/*
-	 * Not all devices managed by a driver instance are the same
-	 * type, so these fields must be per-device.
-	 */
-	u64 ipath_i_bitsextant;
-	ipath_err_t ipath_e_bitsextant;
-	ipath_err_t ipath_hwe_bitsextant;
-
-	/*
-	 * Below should be computable from number of ports,
-	 * since they are never modified.
-	 */
-	u64 ipath_i_rcvavail_mask;
-	u64 ipath_i_rcvurg_mask;
-	u16 ipath_i_rcvurg_shift;
-	u16 ipath_i_rcvavail_shift;
-
-	/*
-	 * Register bits for selecting i2c direction and values, used for
-	 * I2C serial flash.
-	 */
-	u8 ipath_gpio_sda_num;
-	u8 ipath_gpio_scl_num;
-	u8 ipath_i2c_chain_type;
-	u64 ipath_gpio_sda;
-	u64 ipath_gpio_scl;
-
-	/* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
-	spinlock_t ipath_gpio_lock;
-
-	/*
-	 * IB link and linktraining states and masks that vary per chip in
-	 * some way.  Set at init, to avoid each IB status change interrupt
-	 */
-	u8 ibcs_ls_shift;
-	u8 ibcs_lts_mask;
-	u32 ibcs_mask;
-	u32 ib_init;
-	u32 ib_arm;
-	u32 ib_active;
-
-	u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
-
-	/*
-	 * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
-	 * reg. Changes for IBA7220
-	 */
-	u8 ibcc_lic_mask; /* LinkInitCmd */
-	u8 ibcc_lc_shift; /* LinkCmd */
-	u8 ibcc_mpl_shift; /* Maxpktlen */
-
-	u8 delay_mult;
-
-	/* used to override LED behavior */
-	u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
-	u16 ipath_led_override_timeoff; /* delta to next timer event */
-	u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
-	u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
-	atomic_t ipath_led_override_timer_active;
-	/* Used to flash LEDs in override mode */
-	struct timer_list ipath_led_override_timer;
-
-	/* Support (including locks) for EEPROM logging of errors and time */
-	/* control access to actual counters, timer */
-	spinlock_t ipath_eep_st_lock;
-	/* control high-level access to EEPROM */
-	struct mutex ipath_eep_lock;
-	/* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
-	uint64_t ipath_traffic_wds;
-	/* active time is kept in seconds, but logged in hours */
-	atomic_t ipath_active_time;
-	/* Below are nominal shadow of EEPROM, new since last EEPROM update */
-	uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
-	uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
-	uint16_t ipath_eep_hrs;
-	/*
-	 * masks for which bits of errs, hwerrs that cause
-	 * each of the counters to increment.
-	 */
-	struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
-
-	/* interrupt mitigation reload register info */
-	u16 ipath_jint_idle_ticks;	/* idle clock ticks */
-	u16 ipath_jint_max_packets;	/* max packets across all ports */
-
-	/*
-	 * lock for access to SerDes, and flags to sequence preset
-	 * versus steady-state. 7220-only at the moment.
-	 */
-	spinlock_t ipath_sdepb_lock;
-	u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
-};
-
-/* ipath_hol_state values (stopping/starting user proc, send flushing) */
-#define IPATH_HOL_UP       0
-#define IPATH_HOL_DOWN     1
-/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
-#define IPATH_HOL_DOWNSTOP 0
-#define IPATH_HOL_DOWNCONT 1
-
-/* bit positions for sdma_status */
-#define IPATH_SDMA_ABORTING  0
-#define IPATH_SDMA_DISARMED  1
-#define IPATH_SDMA_DISABLED  2
-#define IPATH_SDMA_LAYERBUF  3
-#define IPATH_SDMA_RUNNING  30
-#define IPATH_SDMA_SHUTDOWN 31
-
-/* bit combinations that correspond to abort states */
-#define IPATH_SDMA_ABORT_NONE 0
-#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
-#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISARMED))
-#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-
-#define IPATH_SDMA_BUF_NONE 0
-#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
-
-/* Private data for file operations */
-struct ipath_filedata {
-	struct ipath_portdata *pd;
-	unsigned subport;
-	unsigned tidcursor;
-	struct ipath_user_sdma_queue *pq;
-};
-extern struct list_head ipath_dev_list;
-extern spinlock_t ipath_devs_lock;
-extern struct ipath_devdata *ipath_lookup(int unit);
-
-int ipath_init_chip(struct ipath_devdata *, int);
-int ipath_enable_wc(struct ipath_devdata *dd);
-void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
-void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_clear_freeze(struct ipath_devdata *);
-
-struct file_operations;
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-		    struct cdev **cdevp, struct device **devp);
-void ipath_cdev_cleanup(struct cdev **cdevp,
-			struct device **devp);
-
-int ipath_diag_add(struct ipath_devdata *);
-void ipath_diag_remove(struct ipath_devdata *);
-
-extern wait_queue_head_t ipath_state_wait;
-
-int ipath_user_add(struct ipath_devdata *dd);
-void ipath_user_remove(struct ipath_devdata *dd);
-
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
-
-extern int ipath_diag_inuse;
-
-irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-		     ipath_err_t err);
-#if __IPATH_INFO || __IPATH_DBG
-extern const char *ipath_ibcstatus_str[];
-#endif
-
-/* clean up any per-chip chip-specific stuff */
-void ipath_chip_cleanup(struct ipath_devdata *);
-/* clean up any chip type-specific stuff */
-void ipath_chip_done(void);
-
-void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
-			  unsigned cnt);
-void ipath_cancel_sends(struct ipath_devdata *, int);
-
-int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
-void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
-
-int ipath_parse_ushort(const char *str, unsigned short *valp);
-
-void ipath_kreceive(struct ipath_portdata *);
-int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
-int ipath_reset_device(int);
-void ipath_get_faststats(unsigned long);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-int ipath_set_linkstate(struct ipath_devdata *, u8);
-int ipath_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
-void ipath_enable_armlaunch(struct ipath_devdata *);
-void ipath_disable_armlaunch(struct ipath_devdata *);
-void ipath_hol_down(struct ipath_devdata *);
-void ipath_hol_up(struct ipath_devdata *);
-void ipath_hol_event(unsigned long);
-void ipath_toggle_rclkrls(struct ipath_devdata *);
-void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
-void ipath_set_relock_poll(struct ipath_devdata *, int);
-void ipath_shutdown_relock_poll(struct ipath_devdata *);
-
-/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
-#define subport_fp(fp) \
-	((struct ipath_filedata *)(fp)->private_data)->subport
-#define tidcursor_fp(fp) \
-	((struct ipath_filedata *)(fp)->private_data)->tidcursor
-#define user_sdma_queue_fp(fp) \
-	((struct ipath_filedata *)(fp)->private_data)->pq
-
-/*
- * values for ipath_flags
- */
-		/* chip can report link latency (IB 1.2) */
-#define IPATH_HAS_LINK_LATENCY 0x1
-		/* The chip is up and initted */
-#define IPATH_INITTED       0x2
-		/* set if any user code has set kr_rcvhdrsize */
-#define IPATH_RCVHDRSZ_SET  0x4
-		/* The chip is present and valid for accesses */
-#define IPATH_PRESENT       0x8
-		/* HT link0 is only 8 bits wide, ignore upper byte crc
-		 * errors, etc. */
-#define IPATH_8BIT_IN_HT0   0x10
-		/* HT link1 is only 8 bits wide, ignore upper byte crc
-		 * errors, etc. */
-#define IPATH_8BIT_IN_HT1   0x20
-		/* The link is down */
-#define IPATH_LINKDOWN      0x40
-		/* The link level is up (0x11) */
-#define IPATH_LINKINIT      0x80
-		/* The link is in the armed (0x21) state */
-#define IPATH_LINKARMED     0x100
-		/* The link is in the active (0x31) state */
-#define IPATH_LINKACTIVE    0x200
-		/* link current state is unknown */
-#define IPATH_LINKUNK       0x400
-		/* Write combining flush needed for PIO */
-#define IPATH_PIO_FLUSH_WC  0x1000
-		/* DMA Receive tail pointer */
-#define IPATH_NODMA_RTAIL   0x2000
-		/* no IB cable, or no device on IB cable */
-#define IPATH_NOCABLE       0x4000
-		/* Supports port zero per packet receive interrupts via
-		 * GPIO */
-#define IPATH_GPIO_INTR     0x8000
-		/* uses the coded 4byte TID, not 8 byte */
-#define IPATH_4BYTE_TID     0x10000
-		/* packet/word counters are 32 bit, else those 4 counters
-		 * are 64bit */
-#define IPATH_32BITCOUNTERS 0x20000
-		/* Interrupt register is 64 bits */
-#define IPATH_INTREG_64     0x40000
-		/* can miss port0 rx interrupts */
-#define IPATH_DISABLED      0x80000 /* administratively disabled */
-		/* Use GPIO interrupts for new counters */
-#define IPATH_GPIO_ERRINTRS 0x100000
-#define IPATH_SWAP_PIOBUFS  0x200000
-		/* Supports Send DMA */
-#define IPATH_HAS_SEND_DMA  0x400000
-		/* Supports Send Count (not just word count) in PBC */
-#define IPATH_HAS_PBC_CNT   0x800000
-		/* Suppress heartbeat, even if turning off loopback */
-#define IPATH_NO_HRTBT      0x1000000
-#define IPATH_HAS_THRESH_UPDATE 0x4000000
-#define IPATH_HAS_MULT_IB_SPEED 0x8000000
-#define IPATH_IB_AUTONEG_INPROG 0x10000000
-#define IPATH_IB_AUTONEG_FAILED 0x20000000
-		/* Linkdown-disable intentionally, Do not attempt to bring up */
-#define IPATH_IB_LINK_DISABLED 0x40000000
-#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
-
-/* Bits in GPIO for the added interrupts */
-#define IPATH_GPIO_PORT0_BIT 2
-#define IPATH_GPIO_RXUVL_BIT 3
-#define IPATH_GPIO_OVRUN_BIT 4
-#define IPATH_GPIO_LLI_BIT 5
-#define IPATH_GPIO_ERRINTR_MASK 0x38
-
-/* portdata flag bit offsets */
-		/* waiting for a packet to arrive */
-#define IPATH_PORT_WAITING_RCV   2
-		/* master has not finished initializing */
-#define IPATH_PORT_MASTER_UNINIT 4
-		/* waiting for an urgent packet to arrive */
-#define IPATH_PORT_WAITING_URG 5
-
-/* free up any allocated data at closes */
-void ipath_free_data(struct ipath_portdata *dd);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-				unsigned len, int avail);
-void ipath_init_iba6110_funcs(struct ipath_devdata *);
-void ipath_get_eeprom_info(struct ipath_devdata *);
-int ipath_update_eeprom_log(struct ipath_devdata *dd);
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
-u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
-void ipath_force_pio_avail_update(struct ipath_devdata *);
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
-
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
-
-/* send dma routines */
-int setup_sdma(struct ipath_devdata *);
-void teardown_sdma(struct ipath_devdata *);
-void ipath_restart_sdma(struct ipath_devdata *);
-void ipath_sdma_intr(struct ipath_devdata *);
-int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
-			  u32, struct ipath_verbs_txreq *);
-/* ipath_sdma_lock should be locked before calling this. */
-int ipath_sdma_make_progress(struct ipath_devdata *dd);
-
-/* must be called under ipath_sdma_lock */
-static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
-{
-	return dd->ipath_sdma_descq_cnt -
-		(dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
-		1 - dd->ipath_sdma_desc_nreserved;
-}
-
-static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
-{
-	dd->ipath_sdma_desc_nreserved += cnt;
-}
-
-static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
-{
-	dd->ipath_sdma_desc_nreserved -= cnt;
-}
-
-/*
- * number of words used for protocol header if not set by ipath_userinit();
- */
-#define IPATH_DFLT_RCVHDRSIZE 9
-
-int ipath_get_user_pages(unsigned long, size_t, struct page **);
-void ipath_release_user_pages(struct page **, size_t);
-void ipath_release_user_pages_on_close(struct page **, size_t);
-int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
-int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
-int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
-int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
-
-/* these are used for the registers that vary with port */
-void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
-			   unsigned, u64);
-
-/*
- * We could have a single register get/put routine, that takes a group type,
- * but this is somewhat clearer and cleaner.  It also gives us some error
- * checking.  64 bit register reads should always work, but are inefficient
- * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
- * so we use kreg32 wherever possible.  User register and counter register
- * reads are always 32 bit reads, so only one form of those routines.
- */
-
-/*
- * At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg().
- */
-
-/**
- * ipath_read_ureg32 - read 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @port: port number
- *
- * Return the contents of a register that is virtualized to be per port.
- * Returns -1 on errors (not distinguishable from valid contents at
- * runtime; we may add a separate error variable at some point).
- */
-static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
-				    ipath_ureg regno, int port)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return 0;
-
-	return readl(regno + (u64 __iomem *)
-		     (dd->ipath_uregbase +
-		      (char __iomem *)dd->ipath_kregbase +
-		      dd->ipath_ureg_align * port));
-}
-
-/**
- * ipath_write_ureg - write 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @value: value
- * @port: port
- *
- * Write the contents of a register that is virtualized to be per port.
- */
-static inline void ipath_write_ureg(const struct ipath_devdata *dd,
-				    ipath_ureg regno, u64 value, int port)
-{
-	u64 __iomem *ubase = (u64 __iomem *)
-		(dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
-		 dd->ipath_ureg_align * port);
-	if (dd->ipath_kregbase)
-		writeq(value, &ubase[regno]);
-}
-
-static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
-				    ipath_kreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return -1;
-	return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
-				    ipath_kreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return -1;
-
-	return readq(&dd->ipath_kregbase[regno]);
-}
-
-static inline void ipath_write_kreg(const struct ipath_devdata *dd,
-				    ipath_kreg regno, u64 value)
-{
-	if (dd->ipath_kregbase)
-		writeq(value, &dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
-				  ipath_sreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return 0;
-
-	return readq(regno + (u64 __iomem *)
-		     (dd->ipath_cregbase +
-		      (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
-					 ipath_sreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return 0;
-	return readl(regno + (u64 __iomem *)
-		     (dd->ipath_cregbase +
-		      (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_write_creg(const struct ipath_devdata *dd,
-				    ipath_creg regno, u64 value)
-{
-	if (dd->ipath_kregbase)
-		writeq(value, regno + (u64 __iomem *)
-		       (dd->ipath_cregbase +
-			(char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
-{
-	*((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
-}
-
-static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
-{
-	return (u32) le64_to_cpu(*((volatile __le64 *)
-				pd->port_rcvhdrtail_kvaddr));
-}
-
-static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
-{
-	const struct ipath_devdata *dd = pd->port_dd;
-	u32 hdrqtail;
-
-	if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-		__le32 *rhf_addr;
-		u32 seq;
-
-		rhf_addr = (__le32 *) pd->port_rcvhdrq +
-			pd->port_head + dd->ipath_rhf_offset;
-		seq = ipath_hdrget_seq(rhf_addr);
-		hdrqtail = pd->port_head;
-		if (seq == pd->port_seq_cnt)
-			hdrqtail++;
-	} else
-		hdrqtail = ipath_get_rcvhdrtail(pd);
-
-	return hdrqtail;
-}
-
-static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
-{
-	return (dd->ipath_flags & IPATH_INTREG_64) ?
-		ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return linkstate
- * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
- * everywhere, anyway (and should be, for almost all purposes).
- */
-static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-	u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
-		INFINIPATH_IBCS_LINKSTATE_MASK;
-	if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
-		state = INFINIPATH_IBCS_L_STATE_ACTIVE;
-	return state;
-}
-
-/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
-static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
-{
-	return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		dd->ibcs_lts_mask;
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return logical link state
- * combination of link state and linktraining state (down, active, init,
- * arm, etc.
- */
-static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
-{
-	u32 ibs;
-	ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		dd->ibcs_lts_mask;
-	ibs |= (u32)(ibcs &
-		(INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
-	return ibs;
-}
-
-/*
- * sysfs interface.
- */
-
-struct device_driver;
-
-extern const char ib_ipath_version[];
-
-extern const struct attribute_group *ipath_driver_attr_groups[];
-
-int ipath_device_create_group(struct device *, struct ipath_devdata *);
-void ipath_device_remove_group(struct device *, struct ipath_devdata *);
-int ipath_expose_reset(struct device *);
-
-int ipath_init_ipathfs(void);
-void ipath_exit_ipathfs(void);
-int ipathfs_add_device(struct ipath_devdata *);
-int ipathfs_remove_device(struct ipath_devdata *);
-
-/*
- * dma_addr wrappers - all 0's invalid for hw
- */
-dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
-			  size_t, int);
-dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
-const char *ipath_get_unit_name(int unit);
-
-/*
- * Flush write combining store buffers (if present) and perform a write
- * barrier.
- */
-#if defined(CONFIG_X86_64)
-#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
-#else
-#define ipath_flush_wc() wmb()
-#endif
-
-extern unsigned ipath_debug; /* debugging bit mask */
-extern unsigned ipath_linkrecovery;
-extern unsigned ipath_mtu4096;
-extern struct mutex ipath_mutex;
-
-#define IPATH_DRV_NAME		"ib_ipath"
-#define IPATH_MAJOR		233
-#define IPATH_USER_MINOR_BASE	0
-#define IPATH_DIAGPKT_MINOR	127
-#define IPATH_DIAG_MINOR_BASE	129
-#define IPATH_NMINORS		255
-
-#define ipath_dev_err(dd,fmt,...) \
-	do { \
-		const struct ipath_devdata *__dd = (dd); \
-		if (__dd->pcidev) \
-			dev_err(&__dd->pcidev->dev, "%s: " fmt, \
-				ipath_get_unit_name(__dd->ipath_unit), \
-				##__VA_ARGS__); \
-		else \
-			printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
-			       ipath_get_unit_name(__dd->ipath_unit), \
-			       ##__VA_ARGS__); \
-	} while (0)
-
-#if _IPATH_DEBUGGING
-
-# define __IPATH_DBG_WHICH(which,fmt,...) \
-	do { \
-		if (unlikely(ipath_debug & (which))) \
-			printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
-			       __func__,##__VA_ARGS__); \
-	} while(0)
-
-# define ipath_dbg(fmt,...) \
-	__IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
-# define ipath_cdbg(which,fmt,...) \
-	__IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
-
-#else /* ! _IPATH_DEBUGGING */
-
-# define ipath_dbg(fmt,...)
-# define ipath_cdbg(which,fmt,...)
-
-#endif /* _IPATH_DEBUGGING */
-
-/*
- * this is used for formatting hw error messages...
- */
-struct ipath_hwerror_msgs {
-	u64 mask;
-	const char *msg;
-};
-
-#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
-
-/* in ipath_intr.c... */
-void ipath_format_hwerrors(u64 hwerrs,
-			   const struct ipath_hwerror_msgs *hwerrmsgs,
-			   size_t nhwerrmsgs,
-			   char *msg, size_t lmsg);
-
-#endif				/* _IPATH_KERNEL_H */
diff --git a/drivers/staging/rdma/ipath/ipath_keys.c b/drivers/staging/rdma/ipath/ipath_keys.c
deleted file mode 100644
index c0e933f..0000000
--- a/drivers/staging/rdma/ipath/ipath_keys.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <asm/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
- * @mr: memory region that this lkey protects
- *
- * Returns 1 if successful, otherwise returns 0.
- */
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
-{
-	unsigned long flags;
-	u32 r;
-	u32 n;
-	int ret;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-
-	/* Find the next available LKEY */
-	r = n = rkt->next;
-	for (;;) {
-		if (rkt->table[r] == NULL)
-			break;
-		r = (r + 1) & (rkt->max - 1);
-		if (r == n) {
-			spin_unlock_irqrestore(&rkt->lock, flags);
-			ipath_dbg("LKEY table full\n");
-			ret = 0;
-			goto bail;
-		}
-	}
-	rkt->next = (r + 1) & (rkt->max - 1);
-	/*
-	 * Make sure lkey is never zero which is reserved to indicate an
-	 * unrestricted LKEY.
-	 */
-	rkt->gen++;
-	mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
-		((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
-		 << 8);
-	if (mr->lkey == 0) {
-		mr->lkey |= 1 << 8;
-		rkt->gen++;
-	}
-	rkt->table[r] = mr;
-	spin_unlock_irqrestore(&rkt->lock, flags);
-
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
- */
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
-{
-	unsigned long flags;
-	u32 r;
-
-	if (lkey == 0)
-		return;
-	r = lkey >> (32 - ib_ipath_lkey_table_size);
-	spin_lock_irqsave(&rkt->lock, flags);
-	rkt->table[r] = NULL;
-	spin_unlock_irqrestore(&rkt->lock, flags);
-}
-
-/**
- * ipath_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-		  struct ib_sge *sge, int acc)
-{
-	struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct ipath_mregion *mr;
-	unsigned n, m;
-	size_t off;
-	int ret;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see ipath_get_dma_mr and ipath_dma.c).
-	 */
-	if (sge->lkey == 0) {
-		/* always a kernel port, no locking needed */
-		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-		if (pd->user) {
-			ret = 0;
-			goto bail;
-		}
-		isge->mr = NULL;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		ret = 1;
-		goto bail;
-	}
-	mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
-		     qp->ibqp.pd != mr->pd)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off += mr->offset;
-	m = 0;
-	n = 0;
-	while (off >= mr->map[m]->segs[n].length) {
-		off -= mr->map[m]->segs[n].length;
-		n++;
-		if (n >= IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- */
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-		  u32 len, u64 vaddr, u32 rkey, int acc)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_lkey_table *rkt = &dev->lk_table;
-	struct ipath_sge *sge = &ss->sge;
-	struct ipath_mregion *mr;
-	unsigned n, m;
-	size_t off;
-	int ret;
-
-	/*
-	 * We use RKEY == zero for kernel virtual addresses
-	 * (see ipath_get_dma_mr and ipath_dma.c).
-	 */
-	if (rkey == 0) {
-		/* always a kernel port, no locking needed */
-		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-		if (pd->user) {
-			ret = 0;
-			goto bail;
-		}
-		sge->mr = NULL;
-		sge->vaddr = (void *) vaddr;
-		sge->length = len;
-		sge->sge_length = len;
-		ss->sg_list = NULL;
-		ss->num_sge = 1;
-		ret = 1;
-		goto bail;
-	}
-
-	mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != rkey ||
-		     qp->ibqp.pd != mr->pd)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off = vaddr - mr->iova;
-	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-		     (mr->access_flags & acc) == 0)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off += mr->offset;
-	m = 0;
-	n = 0;
-	while (off >= mr->map[m]->segs[n].length) {
-		off -= mr->map[m]->segs[n].length;
-		n++;
-		if (n >= IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	sge->mr = mr;
-	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	sge->length = mr->map[m]->segs[n].length - off;
-	sge->sge_length = len;
-	sge->m = m;
-	sge->n = n;
-	ss->sg_list = NULL;
-	ss->num_sge = 1;
-
-	ret = 1;
-
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mad.c b/drivers/staging/rdma/ipath/ipath_mad.c
deleted file mode 100644
index ad3a926..0000000
--- a/drivers/staging/rdma/ipath/ipath_mad.c
+++ /dev/null
@@ -1,1521 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_pma.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define IB_SMP_UNSUP_VERSION	cpu_to_be16(0x0004)
-#define IB_SMP_UNSUP_METHOD	cpu_to_be16(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR	cpu_to_be16(0x000C)
-#define IB_SMP_INVALID_FIELD	cpu_to_be16(0x001C)
-
-static int reply(struct ib_smp *smp)
-{
-	/*
-	 * The verbs framework will handle the directed/LID route
-	 * packet changes.
-	 */
-	smp->method = IB_MGMT_METHOD_GET_RESP;
-	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-		smp->status |= IB_SMP_DIRECTION;
-	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-static int recv_subn_get_nodedescription(struct ib_smp *smp,
-					 struct ib_device *ibdev)
-{
-	if (smp->attr_mod)
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
-
-	return reply(smp);
-}
-
-struct nodeinfo {
-	u8 base_version;
-	u8 class_version;
-	u8 node_type;
-	u8 num_ports;
-	__be64 sys_guid;
-	__be64 node_guid;
-	__be64 port_guid;
-	__be16 partition_cap;
-	__be16 device_id;
-	__be32 revision;
-	u8 local_port_num;
-	u8 vendor_id[3];
-} __attribute__ ((packed));
-
-static int recv_subn_get_nodeinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev, u8 port)
-{
-	struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
-	struct ipath_devdata *dd = to_idev(ibdev)->dd;
-	u32 vendor, majrev, minrev;
-
-	/* GUID 0 is illegal */
-	if (smp->attr_mod || (dd->ipath_guid == 0))
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	nip->base_version = 1;
-	nip->class_version = 1;
-	nip->node_type = 1;	/* channel adapter */
-	/*
-	 * XXX The num_ports value will need a layer function to get
-	 * the value if we ever have more than one IB port on a chip.
-	 * We will also need to get the GUID for the port.
-	 */
-	nip->num_ports = ibdev->phys_port_cnt;
-	/* This is already in network order */
-	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-	nip->node_guid = dd->ipath_guid;
-	nip->port_guid = dd->ipath_guid;
-	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
-	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
-	majrev = dd->ipath_majrev;
-	minrev = dd->ipath_minrev;
-	nip->revision = cpu_to_be32((majrev << 16) | minrev);
-	nip->local_port_num = port;
-	vendor = dd->ipath_vendorid;
-	nip->vendor_id[0] = IPATH_SRC_OUI_1;
-	nip->vendor_id[1] = IPATH_SRC_OUI_2;
-	nip->vendor_id[2] = IPATH_SRC_OUI_3;
-
-	return reply(smp);
-}
-
-static int recv_subn_get_guidinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev)
-{
-	u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
-	__be64 *p = (__be64 *) smp->data;
-
-	/* 32 blocks of 8 64-bit GUIDs per block */
-
-	memset(smp->data, 0, sizeof(smp->data));
-
-	/*
-	 * We only support one GUID for now.  If this changes, the
-	 * portinfo.guid_cap field needs to be updated too.
-	 */
-	if (startgx == 0) {
-		__be64 g = to_idev(ibdev)->dd->ipath_guid;
-		if (g == 0)
-			/* GUID 0 is illegal */
-			smp->status |= IB_SMP_INVALID_FIELD;
-		else
-			/* The first is a copy of the read-only HW GUID. */
-			*p = g;
-	} else
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	return reply(smp);
-}
-
-static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
-{
-	(void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
-}
-
-static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
-{
-	(void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
-}
-
-static int get_overrunthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-/**
- * set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-static int get_phyerrthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-/**
- * set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-/**
- * get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-static int get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-static int recv_subn_get_portinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev, u8 port)
-{
-	struct ipath_ibdev *dev;
-	struct ipath_devdata *dd;
-	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-	u16 lid;
-	u8 ibcstat;
-	u8 mtu;
-	int ret;
-
-	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
-		smp->status |= IB_SMP_INVALID_FIELD;
-		ret = reply(smp);
-		goto bail;
-	}
-
-	dev = to_idev(ibdev);
-	dd = dev->dd;
-
-	/* Clear all fields.  Only set the non-zero fields. */
-	memset(smp->data, 0, sizeof(smp->data));
-
-	/* Only return the mkey if the protection field allows it. */
-	if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
-	    dev->mkeyprot == 0)
-		pip->mkey = dev->mkey;
-	pip->gid_prefix = dev->gid_prefix;
-	lid = dd->ipath_lid;
-	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
-	pip->sm_lid = cpu_to_be16(dev->sm_lid);
-	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
-	/* pip->diag_code; */
-	pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
-	pip->local_port_num = port;
-	pip->link_width_enabled = dd->ipath_link_width_enabled;
-	pip->link_width_supported = dd->ipath_link_width_supported;
-	pip->link_width_active = dd->ipath_link_width_active;
-	pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
-	ibcstat = dd->ipath_lastibcstat;
-	/* map LinkState to IB portinfo values.  */
-	pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
-
-	pip->portphysstate_linkdown =
-		(ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
-		(get_linkdowndefaultstate(dd) ? 1 : 2);
-	pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
-	pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
-		dd->ipath_link_speed_enabled;
-	switch (dd->ipath_ibmtu) {
-	case 4096:
-		mtu = IB_MTU_4096;
-		break;
-	case 2048:
-		mtu = IB_MTU_2048;
-		break;
-	case 1024:
-		mtu = IB_MTU_1024;
-		break;
-	case 512:
-		mtu = IB_MTU_512;
-		break;
-	case 256:
-		mtu = IB_MTU_256;
-		break;
-	default:		/* oops, something is wrong */
-		mtu = IB_MTU_2048;
-		break;
-	}
-	pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
-	pip->vlcap_inittype = 0x10;	/* VLCap = VL0, InitType = 0 */
-	pip->vl_high_limit = dev->vl_high_limit;
-	/* pip->vl_arb_high_cap; // only one VL */
-	/* pip->vl_arb_low_cap; // only one VL */
-	/* InitTypeReply = 0 */
-	/* our mtu cap depends on whether 4K MTU enabled or not */
-	pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-	/* HCAs ignore VLStallCount and HOQLife */
-	/* pip->vlstallcnt_hoqlife; */
-	pip->operationalvl_pei_peo_fpi_fpo = 0x10;	/* OVLs = 1 */
-	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
-	/* P_KeyViolations are counted by hardware. */
-	pip->pkey_violations =
-		cpu_to_be16((ipath_get_cr_errpkey(dd) -
-			     dev->z_pkey_violations) & 0xFFFF);
-	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
-	/* Only the hardware GUID is supported for now */
-	pip->guid_cap = 1;
-	pip->clientrereg_resv_subnetto = dev->subnet_timeout;
-	/* 32.768 usec. response time (guessing) */
-	pip->resv_resptimevalue = 3;
-	pip->localphyerrors_overrunerrors =
-		(get_phyerrthreshold(dd) << 4) |
-		get_overrunthreshold(dd);
-	/* pip->max_credit_hint; */
-	if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
-		u32 v;
-
-		v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
-		pip->link_roundtrip_latency[0] = v >> 16;
-		pip->link_roundtrip_latency[1] = v >> 8;
-		pip->link_roundtrip_latency[2] = v;
-	}
-
-	ret = reply(smp);
-
-bail:
-	return ret;
-}
-
-/**
- * get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	/* always a kernel port, no locking needed */
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-
-	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-	return 0;
-}
-
-static int recv_subn_get_pkeytable(struct ib_smp *smp,
-				   struct ib_device *ibdev)
-{
-	u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-	u16 *p = (u16 *) smp->data;
-	__be16 *q = (__be16 *) smp->data;
-
-	/* 64 blocks of 32 16-bit P_Key entries */
-
-	memset(smp->data, 0, sizeof(smp->data));
-	if (startpx == 0) {
-		struct ipath_ibdev *dev = to_idev(ibdev);
-		unsigned i, n = ipath_get_npkeys(dev->dd);
-
-		get_pkeys(dev->dd, p);
-
-		for (i = 0; i < n; i++)
-			q[i] = cpu_to_be16(p[i]);
-	} else
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	return reply(smp);
-}
-
-static int recv_subn_set_guidinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev)
-{
-	/* The only GUID we support is the first read-only entry. */
-	return recv_subn_get_guidinfo(smp, ibdev);
-}
-
-/**
- * set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
-{
-	if (sleep)
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	else
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl);
-	return 0;
-}
-
-/**
- * recv_subn_set_portinfo - set port information
- * @smp: the incoming SM packet
- * @ibdev: the infiniband device
- * @port: the port on the device
- *
- * Set Portinfo (see ch. 14.2.5.6).
- */
-static int recv_subn_set_portinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev, u8 port)
-{
-	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-	struct ib_event event;
-	struct ipath_ibdev *dev;
-	struct ipath_devdata *dd;
-	char clientrereg = 0;
-	u16 lid, smlid;
-	u8 lwe;
-	u8 lse;
-	u8 state;
-	u16 lstate;
-	u32 mtu;
-	int ret, ore;
-
-	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
-		goto err;
-
-	dev = to_idev(ibdev);
-	dd = dev->dd;
-	event.device = ibdev;
-	event.element.port_num = port;
-
-	dev->mkey = pip->mkey;
-	dev->gid_prefix = pip->gid_prefix;
-	dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
-
-	lid = be16_to_cpu(pip->lid);
-	if (dd->ipath_lid != lid ||
-	    dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
-		/* Must be a valid unicast LID address. */
-		if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
-			goto err;
-		ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
-		event.event = IB_EVENT_LID_CHANGE;
-		ib_dispatch_event(&event);
-	}
-
-	smlid = be16_to_cpu(pip->sm_lid);
-	if (smlid != dev->sm_lid) {
-		/* Must be a valid unicast LID address. */
-		if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
-			goto err;
-		dev->sm_lid = smlid;
-		event.event = IB_EVENT_SM_CHANGE;
-		ib_dispatch_event(&event);
-	}
-
-	/* Allow 1x or 4x to be set (see 14.2.6.6). */
-	lwe = pip->link_width_enabled;
-	if (lwe) {
-		if (lwe == 0xFF)
-			lwe = dd->ipath_link_width_supported;
-		else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
-			goto err;
-		set_link_width_enabled(dd, lwe);
-	}
-
-	/* Allow 2.5 or 5.0 Gbs. */
-	lse = pip->linkspeedactive_enabled & 0xF;
-	if (lse) {
-		if (lse == 15)
-			lse = dd->ipath_link_speed_supported;
-		else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
-			goto err;
-		set_link_speed_enabled(dd, lse);
-	}
-
-	/* Set link down default state. */
-	switch (pip->portphysstate_linkdown & 0xF) {
-	case 0: /* NOP */
-		break;
-	case 1: /* SLEEP */
-		if (set_linkdowndefaultstate(dd, 1))
-			goto err;
-		break;
-	case 2: /* POLL */
-		if (set_linkdowndefaultstate(dd, 0))
-			goto err;
-		break;
-	default:
-		goto err;
-	}
-
-	dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-	dev->vl_high_limit = pip->vl_high_limit;
-
-	switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
-	case IB_MTU_256:
-		mtu = 256;
-		break;
-	case IB_MTU_512:
-		mtu = 512;
-		break;
-	case IB_MTU_1024:
-		mtu = 1024;
-		break;
-	case IB_MTU_2048:
-		mtu = 2048;
-		break;
-	case IB_MTU_4096:
-		if (!ipath_mtu4096)
-			goto err;
-		mtu = 4096;
-		break;
-	default:
-		/* XXX We have already partially updated our state! */
-		goto err;
-	}
-	ipath_set_mtu(dd, mtu);
-
-	dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
-
-	/* We only support VL0 */
-	if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
-		goto err;
-
-	if (pip->mkey_violations == 0)
-		dev->mkey_violations = 0;
-
-	/*
-	 * Hardware counter can't be reset so snapshot and subtract
-	 * later.
-	 */
-	if (pip->pkey_violations == 0)
-		dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
-
-	if (pip->qkey_violations == 0)
-		dev->qkey_violations = 0;
-
-	ore = pip->localphyerrors_overrunerrors;
-	if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
-		goto err;
-
-	if (set_overrunthreshold(dd, (ore & 0xF)))
-		goto err;
-
-	dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
-
-	if (pip->clientrereg_resv_subnetto & 0x80) {
-		clientrereg = 1;
-		event.event = IB_EVENT_CLIENT_REREGISTER;
-		ib_dispatch_event(&event);
-	}
-
-	/*
-	 * Do the port state change now that the other link parameters
-	 * have been set.
-	 * Changing the port physical state only makes sense if the link
-	 * is down or is being set to down.
-	 */
-	state = pip->linkspeed_portstate & 0xF;
-	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
-	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
-		goto err;
-
-	/*
-	 * Only state changes of DOWN, ARM, and ACTIVE are valid
-	 * and must be in the correct state to take effect (see 7.2.6).
-	 */
-	switch (state) {
-	case IB_PORT_NOP:
-		if (lstate == 0)
-			break;
-		/* FALLTHROUGH */
-	case IB_PORT_DOWN:
-		if (lstate == 0)
-			lstate = IPATH_IB_LINKDOWN_ONLY;
-		else if (lstate == 1)
-			lstate = IPATH_IB_LINKDOWN_SLEEP;
-		else if (lstate == 2)
-			lstate = IPATH_IB_LINKDOWN;
-		else if (lstate == 3)
-			lstate = IPATH_IB_LINKDOWN_DISABLE;
-		else
-			goto err;
-		ipath_set_linkstate(dd, lstate);
-		if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
-			ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-			goto done;
-		}
-		ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
-				IPATH_LINKACTIVE, 1000);
-		break;
-	case IB_PORT_ARMED:
-		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-		break;
-	case IB_PORT_ACTIVE:
-		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-		break;
-	default:
-		/* XXX We have already partially updated our state! */
-		goto err;
-	}
-
-	ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-	if (clientrereg)
-		pip->clientrereg_resv_subnetto |= 0x80;
-
-	goto done;
-
-err:
-	smp->status |= IB_SMP_INVALID_FIELD;
-	ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-done:
-	return ret;
-}
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (dd->ipath_pkeys[i] != key)
-			continue;
-		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-			dd->ipath_pkeys[i] = 0;
-			ret = 1;
-			goto bail;
-		}
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	u16 lkey = key & 0x7FFF;
-	int any = 0;
-	int ret;
-
-	if (lkey == 0x7FFF) {
-		ret = 0;
-		goto bail;
-	}
-
-	/* Look for an empty slot or a matching PKEY. */
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		/* If it matches exactly, try to increment the ref count */
-		if (dd->ipath_pkeys[i] == key) {
-			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-				ret = 0;
-				goto bail;
-			}
-			/* Lost the race. Look for an empty slot below. */
-			atomic_dec(&dd->ipath_pkeyrefs[i]);
-			any++;
-		}
-		/*
-		 * It makes no sense to have both the limited and unlimited
-		 * PKEY set at the same time since the unlimited one will
-		 * disable the limited one.
-		 */
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			dd->ipath_pkeys[i] = key;
-			ret = 1;
-			goto bail;
-		}
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
-{
-	struct ipath_portdata *pd;
-	int i;
-	int changed = 0;
-
-	/* always a kernel port, no locking needed */
-	pd = dd->ipath_pd[0];
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		u16 key = pkeys[i];
-		u16 okey = pd->port_pkeys[i];
-
-		if (key == okey)
-			continue;
-		/*
-		 * The value of this PKEY table entry is changing.
-		 * Remove the old entry in the hardware's array of PKEYs.
-		 */
-		if (okey & 0x7FFF)
-			changed |= rm_pkey(dd, okey);
-		if (key & 0x7FFF) {
-			int ret = add_pkey(dd, key);
-
-			if (ret < 0)
-				key = 0;
-			else
-				changed |= ret;
-		}
-		pd->port_pkeys[i] = key;
-	}
-	if (changed) {
-		u64 pkey;
-		struct ib_event event;
-
-		pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev->ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
-	}
-	return 0;
-}
-
-static int recv_subn_set_pkeytable(struct ib_smp *smp,
-				   struct ib_device *ibdev, u8 port)
-{
-	u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-	__be16 *p = (__be16 *) smp->data;
-	u16 *q = (u16 *) smp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	unsigned i, n = ipath_get_npkeys(dev->dd);
-
-	for (i = 0; i < n; i++)
-		q[i] = be16_to_cpu(p[i]);
-
-	if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	return recv_subn_get_pkeytable(smp, ibdev);
-}
-
-static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
-{
-	struct ib_class_port_info *p =
-		(struct ib_class_port_info *)pmp->data;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	if (pmp->mad_hdr.attr_mod != 0)
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-	/* Indicate AllPortSelect is valid (only one port anyway) */
-	p->capability_mask = cpu_to_be16(1 << 8);
-	p->base_version = 1;
-	p->class_version = 1;
-	/*
-	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824
-	 * sec.
-	 */
-	p->resp_time_value = 18;
-
-	return reply((struct ib_smp *) pmp);
-}
-
-/*
- * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
- * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
- * We support 5 counters which only count the mandatory quantities.
- */
-#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
-				    COUNTER_MASK(1, 1) | \
-				    COUNTER_MASK(1, 2) | \
-				    COUNTER_MASK(1, 3) | \
-				    COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
-					   struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portsamplescontrol *p =
-		(struct ib_pma_portsamplescontrol *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	unsigned long flags;
-	u8 port_select = p->port_select;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	p->port_select = port_select;
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (port_select != port && port_select != 0xFF))
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-	/*
-	 * Ticks are 10x the link transfer period which for 2.5Gbs is 4
-	 * nsec.  0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec.  Sample
-	 * intervals are counted in ticks.  Since we use Linux timers, that
-	 * count in jiffies, we can't sample for less than 1000 ticks if HZ
-	 * == 1000 (4000 ticks if HZ is 250).  link_speed_active returns 2 for
-	 * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
-	 * have hardware support for delaying packets.
-	 */
-	if (crp->cr_psstat)
-		p->tick = dev->dd->ipath_link_speed_active - 1;
-	else
-		p->tick = 250;		/* 1 usec. */
-	p->counter_width = 4;	/* 32 bit counters */
-	p->counter_mask0_9 = COUNTER_MASK0_9;
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (crp->cr_psstat)
-		p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		p->sample_status = dev->pma_sample_status;
-	p->sample_start = cpu_to_be32(dev->pma_sample_start);
-	p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
-	p->tag = cpu_to_be16(dev->pma_tag);
-	p->counter_select[0] = dev->pma_counter_select[0];
-	p->counter_select[1] = dev->pma_counter_select[1];
-	p->counter_select[2] = dev->pma_counter_select[2];
-	p->counter_select[3] = dev->pma_counter_select[3];
-	p->counter_select[4] = dev->pma_counter_select[4];
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
-					   struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portsamplescontrol *p =
-		(struct ib_pma_portsamplescontrol *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	unsigned long flags;
-	u8 status;
-	int ret;
-
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (p->port_select != port && p->port_select != 0xFF)) {
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-		ret = reply((struct ib_smp *) pmp);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (crp->cr_psstat)
-		status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		status = dev->pma_sample_status;
-	if (status == IB_PMA_SAMPLE_STATUS_DONE) {
-		dev->pma_sample_start = be32_to_cpu(p->sample_start);
-		dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
-		dev->pma_tag = be16_to_cpu(p->tag);
-		dev->pma_counter_select[0] = p->counter_select[0];
-		dev->pma_counter_select[1] = p->counter_select[1];
-		dev->pma_counter_select[2] = p->counter_select[2];
-		dev->pma_counter_select[3] = p->counter_select[3];
-		dev->pma_counter_select[4] = p->counter_select[4];
-		if (crp->cr_psstat) {
-			ipath_write_creg(dev->dd, crp->cr_psinterval,
-					 dev->pma_sample_interval);
-			ipath_write_creg(dev->dd, crp->cr_psstart,
-					 dev->pma_sample_start);
-		} else
-			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
-
-bail:
-	return ret;
-}
-
-static u64 get_counter(struct ipath_ibdev *dev,
-		       struct ipath_cregs const *crp,
-		       __be16 sel)
-{
-	u64 ret;
-
-	switch (sel) {
-	case IB_PMA_PORT_XMIT_DATA:
-		ret = (crp->cr_psxmitdatacount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
-			dev->ipath_sword;
-		break;
-	case IB_PMA_PORT_RCV_DATA:
-		ret = (crp->cr_psrcvdatacount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
-			dev->ipath_rword;
-		break;
-	case IB_PMA_PORT_XMIT_PKTS:
-		ret = (crp->cr_psxmitpktscount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
-			dev->ipath_spkts;
-		break;
-	case IB_PMA_PORT_RCV_PKTS:
-		ret = (crp->cr_psrcvpktscount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
-			dev->ipath_rpkts;
-		break;
-	case IB_PMA_PORT_XMIT_WAIT:
-		ret = (crp->cr_psxmitwaitcount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
-			dev->ipath_xmit_wait;
-		break;
-	default:
-		ret = 0;
-	}
-
-	return ret;
-}
-
-static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
-					  struct ib_device *ibdev)
-{
-	struct ib_pma_portsamplesresult *p =
-		(struct ib_pma_portsamplesresult *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	u8 status;
-	int i;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-	p->tag = cpu_to_be16(dev->pma_tag);
-	if (crp->cr_psstat)
-		status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		status = dev->pma_sample_status;
-	p->sample_status = cpu_to_be16(status);
-	for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-		p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-		    cpu_to_be32(
-			get_counter(dev, crp, dev->pma_counter_select[i]));
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
-					      struct ib_device *ibdev)
-{
-	struct ib_pma_portsamplesresult_ext *p =
-		(struct ib_pma_portsamplesresult_ext *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	u8 status;
-	int i;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-	p->tag = cpu_to_be16(dev->pma_tag);
-	if (crp->cr_psstat)
-		status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		status = dev->pma_sample_status;
-	p->sample_status = cpu_to_be16(status);
-	/* 64 bits */
-	p->extended_width = cpu_to_be32(0x80000000);
-	for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-		p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-		    cpu_to_be64(
-			get_counter(dev, crp, dev->pma_counter_select[i]));
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
-				     struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-		pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_verbs_counters cntrs;
-	u8 port_select = p->port_select;
-
-	ipath_get_counters(dev->dd, &cntrs);
-
-	/* Adjust counters for any resets done. */
-	cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
-	cntrs.link_error_recovery_counter -=
-		dev->z_link_error_recovery_counter;
-	cntrs.link_downed_counter -= dev->z_link_downed_counter;
-	cntrs.port_rcv_errors += dev->rcv_errors;
-	cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
-	cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
-	cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
-	cntrs.port_xmit_data -= dev->z_port_xmit_data;
-	cntrs.port_rcv_data -= dev->z_port_rcv_data;
-	cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
-	cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
-	cntrs.local_link_integrity_errors -=
-		dev->z_local_link_integrity_errors;
-	cntrs.excessive_buffer_overrun_errors -=
-		dev->z_excessive_buffer_overrun_errors;
-	cntrs.vl15_dropped -= dev->z_vl15_dropped;
-	cntrs.vl15_dropped += dev->n_vl15_dropped;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	p->port_select = port_select;
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (port_select != port && port_select != 0xFF))
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-	if (cntrs.symbol_error_counter > 0xFFFFUL)
-		p->symbol_error_counter = cpu_to_be16(0xFFFF);
-	else
-		p->symbol_error_counter =
-			cpu_to_be16((u16)cntrs.symbol_error_counter);
-	if (cntrs.link_error_recovery_counter > 0xFFUL)
-		p->link_error_recovery_counter = 0xFF;
-	else
-		p->link_error_recovery_counter =
-			(u8)cntrs.link_error_recovery_counter;
-	if (cntrs.link_downed_counter > 0xFFUL)
-		p->link_downed_counter = 0xFF;
-	else
-		p->link_downed_counter = (u8)cntrs.link_downed_counter;
-	if (cntrs.port_rcv_errors > 0xFFFFUL)
-		p->port_rcv_errors = cpu_to_be16(0xFFFF);
-	else
-		p->port_rcv_errors =
-			cpu_to_be16((u16) cntrs.port_rcv_errors);
-	if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
-		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
-	else
-		p->port_rcv_remphys_errors =
-			cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
-	if (cntrs.port_xmit_discards > 0xFFFFUL)
-		p->port_xmit_discards = cpu_to_be16(0xFFFF);
-	else
-		p->port_xmit_discards =
-			cpu_to_be16((u16)cntrs.port_xmit_discards);
-	if (cntrs.local_link_integrity_errors > 0xFUL)
-		cntrs.local_link_integrity_errors = 0xFUL;
-	if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
-		cntrs.excessive_buffer_overrun_errors = 0xFUL;
-	p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
-		cntrs.excessive_buffer_overrun_errors;
-	if (cntrs.vl15_dropped > 0xFFFFUL)
-		p->vl15_dropped = cpu_to_be16(0xFFFF);
-	else
-		p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
-	if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
-		p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
-	if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
-		p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
-	if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
-		p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_xmit_packets =
-			cpu_to_be32((u32)cntrs.port_xmit_packets);
-	if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
-		p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_rcv_packets =
-			cpu_to_be32((u32) cntrs.port_rcv_packets);
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
-					 struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters_ext *p =
-		(struct ib_pma_portcounters_ext *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	u64 swords, rwords, spkts, rpkts, xwait;
-	u8 port_select = p->port_select;
-
-	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				&rpkts, &xwait);
-
-	/* Adjust counters for any resets done. */
-	swords -= dev->z_port_xmit_data;
-	rwords -= dev->z_port_rcv_data;
-	spkts -= dev->z_port_xmit_packets;
-	rpkts -= dev->z_port_rcv_packets;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	p->port_select = port_select;
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (port_select != port && port_select != 0xFF))
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-	p->port_xmit_data = cpu_to_be64(swords);
-	p->port_rcv_data = cpu_to_be64(rwords);
-	p->port_xmit_packets = cpu_to_be64(spkts);
-	p->port_rcv_packets = cpu_to_be64(rpkts);
-	p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
-	p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
-	p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
-	p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
-				     struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-		pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_verbs_counters cntrs;
-
-	/*
-	 * Since the HW doesn't support clearing counters, we save the
-	 * current count and subtract it from future responses.
-	 */
-	ipath_get_counters(dev->dd, &cntrs);
-
-	if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
-		dev->z_symbol_error_counter = cntrs.symbol_error_counter;
-
-	if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
-		dev->z_link_error_recovery_counter =
-			cntrs.link_error_recovery_counter;
-
-	if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
-		dev->z_link_downed_counter = cntrs.link_downed_counter;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
-		dev->z_port_rcv_errors =
-			cntrs.port_rcv_errors + dev->rcv_errors;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
-		dev->z_port_rcv_remphys_errors =
-			cntrs.port_rcv_remphys_errors;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
-		dev->z_port_xmit_discards = cntrs.port_xmit_discards;
-
-	if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
-		dev->z_local_link_integrity_errors =
-			cntrs.local_link_integrity_errors;
-
-	if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
-		dev->z_excessive_buffer_overrun_errors =
-			cntrs.excessive_buffer_overrun_errors;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-		dev->n_vl15_dropped = 0;
-		dev->z_vl15_dropped = cntrs.vl15_dropped;
-	}
-
-	if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
-		dev->z_port_xmit_data = cntrs.port_xmit_data;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
-		dev->z_port_rcv_data = cntrs.port_rcv_data;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
-		dev->z_port_xmit_packets = cntrs.port_xmit_packets;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
-		dev->z_port_rcv_packets = cntrs.port_rcv_packets;
-
-	return recv_pma_get_portcounters(pmp, ibdev, port);
-}
-
-static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
-					 struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-		pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	u64 swords, rwords, spkts, rpkts, xwait;
-
-	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				&rpkts, &xwait);
-
-	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
-		dev->z_port_xmit_data = swords;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
-		dev->z_port_rcv_data = rwords;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
-		dev->z_port_xmit_packets = spkts;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
-		dev->z_port_rcv_packets = rpkts;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-		dev->n_unicast_xmit = 0;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-		dev->n_unicast_rcv = 0;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-		dev->n_multicast_xmit = 0;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-		dev->n_multicast_rcv = 0;
-
-	return recv_pma_get_portcounters_ext(pmp, ibdev, port);
-}
-
-static int process_subn(struct ib_device *ibdev, int mad_flags,
-			u8 port_num, const struct ib_mad *in_mad,
-			struct ib_mad *out_mad)
-{
-	struct ib_smp *smp = (struct ib_smp *)out_mad;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	int ret;
-
-	*out_mad = *in_mad;
-	if (smp->class_version != 1) {
-		smp->status |= IB_SMP_UNSUP_VERSION;
-		ret = reply(smp);
-		goto bail;
-	}
-
-	/* Is the mkey in the process of expiring? */
-	if (dev->mkey_lease_timeout &&
-	    time_after_eq(jiffies, dev->mkey_lease_timeout)) {
-		/* Clear timeout and mkey protection field. */
-		dev->mkey_lease_timeout = 0;
-		dev->mkeyprot = 0;
-	}
-
-	/*
-	 * M_Key checking depends on
-	 * Portinfo:M_Key_protect_bits
-	 */
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
-	    dev->mkey != smp->mkey &&
-	    (smp->method == IB_MGMT_METHOD_SET ||
-	     (smp->method == IB_MGMT_METHOD_GET &&
-	      dev->mkeyprot >= 2))) {
-		if (dev->mkey_violations != 0xFFFF)
-			++dev->mkey_violations;
-		if (dev->mkey_lease_timeout ||
-		    dev->mkey_lease_period == 0) {
-			ret = IB_MAD_RESULT_SUCCESS |
-				IB_MAD_RESULT_CONSUMED;
-			goto bail;
-		}
-		dev->mkey_lease_timeout = jiffies +
-			dev->mkey_lease_period * HZ;
-		/* Future: Generate a trap notice. */
-		ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		goto bail;
-	} else if (dev->mkey_lease_timeout)
-		dev->mkey_lease_timeout = 0;
-
-	switch (smp->method) {
-	case IB_MGMT_METHOD_GET:
-		switch (smp->attr_id) {
-		case IB_SMP_ATTR_NODE_DESC:
-			ret = recv_subn_get_nodedescription(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_NODE_INFO:
-			ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_GUID_INFO:
-			ret = recv_subn_get_guidinfo(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_PORT_INFO:
-			ret = recv_subn_get_portinfo(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_PKEY_TABLE:
-			ret = recv_subn_get_pkeytable(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_SM_INFO:
-			if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-				ret = IB_MAD_RESULT_SUCCESS |
-					IB_MAD_RESULT_CONSUMED;
-				goto bail;
-			}
-			if (dev->port_cap_flags & IB_PORT_SM) {
-				ret = IB_MAD_RESULT_SUCCESS;
-				goto bail;
-			}
-			/* FALLTHROUGH */
-		default:
-			smp->status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply(smp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_SET:
-		switch (smp->attr_id) {
-		case IB_SMP_ATTR_GUID_INFO:
-			ret = recv_subn_set_guidinfo(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_PORT_INFO:
-			ret = recv_subn_set_portinfo(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_PKEY_TABLE:
-			ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_SM_INFO:
-			if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-				ret = IB_MAD_RESULT_SUCCESS |
-					IB_MAD_RESULT_CONSUMED;
-				goto bail;
-			}
-			if (dev->port_cap_flags & IB_PORT_SM) {
-				ret = IB_MAD_RESULT_SUCCESS;
-				goto bail;
-			}
-			/* FALLTHROUGH */
-		default:
-			smp->status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply(smp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_TRAP:
-	case IB_MGMT_METHOD_REPORT:
-	case IB_MGMT_METHOD_REPORT_RESP:
-	case IB_MGMT_METHOD_TRAP_REPRESS:
-	case IB_MGMT_METHOD_GET_RESP:
-		/*
-		 * The ib_mad module will call us to process responses
-		 * before checking for other consumers.
-		 * Just tell the caller to process it normally.
-		 */
-		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
-	default:
-		smp->status |= IB_SMP_UNSUP_METHOD;
-		ret = reply(smp);
-	}
-
-bail:
-	return ret;
-}
-
-static int process_perf(struct ib_device *ibdev, u8 port_num,
-			const struct ib_mad *in_mad,
-			struct ib_mad *out_mad)
-{
-	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
-	int ret;
-
-	*out_mad = *in_mad;
-	if (pmp->mad_hdr.class_version != 1) {
-		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
-		ret = reply((struct ib_smp *) pmp);
-		goto bail;
-	}
-
-	switch (pmp->mad_hdr.method) {
-	case IB_MGMT_METHOD_GET:
-		switch (pmp->mad_hdr.attr_id) {
-		case IB_PMA_CLASS_PORT_INFO:
-			ret = recv_pma_get_classportinfo(pmp);
-			goto bail;
-		case IB_PMA_PORT_SAMPLES_CONTROL:
-			ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
-							      port_num);
-			goto bail;
-		case IB_PMA_PORT_SAMPLES_RESULT:
-			ret = recv_pma_get_portsamplesresult(pmp, ibdev);
-			goto bail;
-		case IB_PMA_PORT_SAMPLES_RESULT_EXT:
-			ret = recv_pma_get_portsamplesresult_ext(pmp,
-								 ibdev);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS:
-			ret = recv_pma_get_portcounters(pmp, ibdev,
-							port_num);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS_EXT:
-			ret = recv_pma_get_portcounters_ext(pmp, ibdev,
-							    port_num);
-			goto bail;
-		default:
-			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply((struct ib_smp *) pmp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_SET:
-		switch (pmp->mad_hdr.attr_id) {
-		case IB_PMA_PORT_SAMPLES_CONTROL:
-			ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
-							      port_num);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS:
-			ret = recv_pma_set_portcounters(pmp, ibdev,
-							port_num);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS_EXT:
-			ret = recv_pma_set_portcounters_ext(pmp, ibdev,
-							    port_num);
-			goto bail;
-		default:
-			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply((struct ib_smp *) pmp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_GET_RESP:
-		/*
-		 * The ib_mad module will call us to process responses
-		 * before checking for other consumers.
-		 * Just tell the caller to process it normally.
-		 */
-		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
-	default:
-		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
-		ret = reply((struct ib_smp *) pmp);
-	}
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_process_mad - process an incoming MAD packet
- * @ibdev: the infiniband device this packet came in on
- * @mad_flags: MAD flags
- * @port_num: the port number this packet came in on
- * @in_wc: the work completion entry for this packet
- * @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
- *
- * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
- * interested in processing.
- *
- * Note that the verbs framework has already done the MAD sanity checks,
- * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
- * MADs.
- *
- * This is called by the ib_mad module.
- */
-int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		      const struct ib_mad_hdr *in, size_t in_mad_size,
-		      struct ib_mad_hdr *out, size_t *out_mad_size,
-		      u16 *out_mad_pkey_index)
-{
-	int ret;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
-
-	switch (in_mad->mad_hdr.mgmt_class) {
-	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-		ret = process_subn(ibdev, mad_flags, port_num,
-				   in_mad, out_mad);
-		goto bail;
-	case IB_MGMT_CLASS_PERF_MGMT:
-		ret = process_perf(ibdev, port_num, in_mad, out_mad);
-		goto bail;
-	default:
-		ret = IB_MAD_RESULT_SUCCESS;
-	}
-
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mmap.c b/drivers/staging/rdma/ipath/ipath_mmap.c
deleted file mode 100644
index e732742..0000000
--- a/drivers/staging/rdma/ipath/ipath_mmap.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct ipath_mmap_info
- */
-void ipath_release_mmap_info(struct kref *ref)
-{
-	struct ipath_mmap_info *ip =
-		container_of(ref, struct ipath_mmap_info, ref);
-	struct ipath_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void ipath_vma_open(struct vm_area_struct *vma)
-{
-	struct ipath_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void ipath_vma_close(struct vm_area_struct *vma)
-{
-	struct ipath_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, ipath_release_mmap_info);
-}
-
-static const struct vm_operations_struct ipath_vm_ops = {
-	.open =     ipath_vma_open,
-	.close =    ipath_vma_close,
-};
-
-/**
- * ipath_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct ipath_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct ipath_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &ipath_vm_ops;
-		vma->vm_private_data = ip;
-		ipath_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for ipath_mmap
- */
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-					       u32 size,
-					       struct ib_ucontext *context,
-					       void *obj) {
-	struct ipath_mmap_info *ip;
-
-	ip = kmalloc(sizeof *ip, GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-			    struct ipath_mmap_info *ip,
-			    u32 size, void *obj) {
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
deleted file mode 100644
index b76b0ce..0000000
--- a/drivers/staging/rdma/ipath/ipath_mr.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/slab.h>
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-
-/* Fast memory region */
-struct ipath_fmr {
-	struct ib_fmr ibfmr;
-	u8 page_shift;
-	struct ipath_mregion mr;        /* must be last */
-};
-
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
-/**
- * ipath_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see ipath_dma.c).
- */
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct ipath_mr *mr;
-	struct ib_mr *ret;
-
-	mr = kzalloc(sizeof *mr, GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-static struct ipath_mr *alloc_mr(int count,
-				 struct ipath_lkey_table *lk_table)
-{
-	struct ipath_mr *mr;
-	int m, i = 0;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-	mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
-	if (!mr)
-		goto done;
-
-	/* Allocate first level page tables. */
-	for (; i < m; i++) {
-		mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
-		if (!mr->mr.map[i])
-			goto bail;
-	}
-	mr->mr.mapsz = m;
-
-	if (!ipath_alloc_lkey(lk_table, &mr->mr))
-		goto bail;
-	mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
-
-	goto done;
-
-bail:
-	while (i) {
-		i--;
-		kfree(mr->mr.map[i]);
-	}
-	kfree(mr);
-	mr = NULL;
-
-done:
-	return mr;
-}
-
-/**
- * ipath_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the InfiniPath driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				u64 virt_addr, int mr_access_flags,
-				struct ib_udata *udata)
-{
-	struct ipath_mr *mr;
-	struct ib_umem *umem;
-	int n, m, entry;
-	struct scatterlist *sg;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-	mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.pd = pd;
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->mr.max_segs = n;
-	mr->umem = umem;
-
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		void *vaddr;
-
-		vaddr = page_address(sg_page(sg));
-		if (!vaddr) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		mr->mr.map[m]->segs[n].vaddr = vaddr;
-		mr->mr.map[m]->segs[n].length = umem->page_size;
-		n++;
-		if (n == IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by ipath_get_dma_mr()
- * or ipath_reg_user_mr().
- */
-int ipath_dereg_mr(struct ib_mr *ibmr)
-{
-	struct ipath_mr *mr = to_imr(ibmr);
-	int i;
-
-	ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
-	i = mr->mr.mapsz;
-	while (i) {
-		i--;
-		kfree(mr->mr.map[i]);
-	}
-
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-
-	kfree(mr);
-	return 0;
-}
-
-/**
- * ipath_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			       struct ib_fmr_attr *fmr_attr)
-{
-	struct ipath_fmr *fmr;
-	int m, i = 0;
-	struct ib_fmr *ret;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-	fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	/* Allocate first level page tables. */
-	for (; i < m; i++) {
-		fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
-					 GFP_KERNEL);
-		if (!fmr->mr.map[i])
-			goto bail;
-	}
-	fmr->mr.mapsz = m;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
-		goto bail;
-	fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.pd = pd;
-	fmr->mr.user_base = 0;
-	fmr->mr.iova = 0;
-	fmr->mr.length = 0;
-	fmr->mr.offset = 0;
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-	goto done;
-
-bail:
-	while (i)
-		kfree(fmr->mr.map[--i]);
-	kfree(fmr);
-	ret = ERR_PTR(-ENOMEM);
-
-done:
-	return ret;
-}
-
-/**
- * ipath_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-		       int list_len, u64 iova)
-{
-	struct ipath_fmr *fmr = to_ifmr(ibfmr);
-	struct ipath_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	ps = 1 << fmr->page_shift;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int ipath_unmap_fmr(struct list_head *fmr_list)
-{
-	struct ipath_fmr *fmr;
-	struct ipath_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * ipath_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct ipath_fmr *fmr = to_ifmr(ibfmr);
-	int i;
-
-	ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
-	i = fmr->mr.mapsz;
-	while (i)
-		kfree(fmr->mr.map[--i]);
-	kfree(fmr);
-	return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_qp.c b/drivers/staging/rdma/ipath/ipath_qp.c
deleted file mode 100644
index 280cd2d..0000000
--- a/drivers/staging/rdma/ipath/ipath_qp.c
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-#define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
-#define mk_qpn(qpt, map, off)	(((map) - (qpt)->map) * BITS_PER_PAGE + \
-				 (off))
-#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
-						      BITS_PER_PAGE, off)
-
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
-	0,			/* 0 */
-	1,			/* 1 */
-	2,			/* 2 */
-	3,			/* 3 */
-	4,			/* 4 */
-	6,			/* 5 */
-	8,			/* 6 */
-	12,			/* 7 */
-	16,			/* 8 */
-	24,			/* 9 */
-	32,			/* A */
-	48,			/* B */
-	64,			/* C */
-	96,			/* D */
-	128,			/* E */
-	192,			/* F */
-	256,			/* 10 */
-	384,			/* 11 */
-	512,			/* 12 */
-	768,			/* 13 */
-	1024,			/* 14 */
-	1536,			/* 15 */
-	2048,			/* 16 */
-	3072,			/* 17 */
-	4096,			/* 18 */
-	6144,			/* 19 */
-	8192,			/* 1A */
-	12288,			/* 1B */
-	16384,			/* 1C */
-	24576,			/* 1D */
-	32768			/* 1E */
-};
-
-
-static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
-{
-	unsigned long page = get_zeroed_page(GFP_KERNEL);
-	unsigned long flags;
-
-	/*
-	 * Free the page if someone raced with us installing it.
-	 */
-
-	spin_lock_irqsave(&qpt->lock, flags);
-	if (map->page)
-		free_page(page);
-	else
-		map->page = (void *)page;
-	spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-
-static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
-{
-	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
-	u32 ret = -1;
-
-	if (type == IB_QPT_SMI)
-		ret = 0;
-	else if (type == IB_QPT_GSI)
-		ret = 1;
-
-	if (ret != -1) {
-		map = &qpt->map[0];
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page)) {
-				ret = -ENOMEM;
-				goto bail;
-			}
-		}
-		if (!test_and_set_bit(ret, map->page))
-			atomic_dec(&map->n_free);
-		else
-			ret = -EBUSY;
-		goto bail;
-	}
-
-	qpn = qpt->last + 1;
-	if (qpn >= QPN_MAX)
-		qpn = 2;
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
-	max_scan = qpt->nmaps - !offset;
-	for (i = 0;;) {
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page))
-				break;
-		}
-		if (likely(atomic_read(&map->n_free))) {
-			do {
-				if (!test_and_set_bit(offset, map->page)) {
-					atomic_dec(&map->n_free);
-					qpt->last = qpn;
-					ret = qpn;
-					goto bail;
-				}
-				offset = find_next_offset(map, offset);
-				qpn = mk_qpn(qpt, map, offset);
-				/*
-				 * This test differs from alloc_pidmap().
-				 * If find_next_offset() does find a zero
-				 * bit, we don't need to check for QPN
-				 * wrapping around past our starting QPN.
-				 * We just need to be sure we don't loop
-				 * forever.
-				 */
-			} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-		}
-		/*
-		 * In order to keep the number of pages allocated to a
-		 * minimum, we scan the all existing pages before increasing
-		 * the size of the bitmap table.
-		 */
-		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
-				break;
-			map = &qpt->map[qpt->nmaps++];
-			offset = 0;
-		} else if (map < &qpt->map[qpt->nmaps]) {
-			++map;
-			offset = 0;
-		} else {
-			map = &qpt->map[0];
-			offset = 2;
-		}
-		qpn = mk_qpn(qpt, map, offset);
-	}
-
-	ret = -ENOMEM;
-
-bail:
-	return ret;
-}
-
-static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-	atomic_inc(&map->n_free);
-}
-
-/**
- * ipath_alloc_qpn - allocate a QP number
- * @qpt: the QP table
- * @qp: the QP
- * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
- *
- * Allocate the next available QPN and put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
-			   enum ib_qp_type type)
-{
-	unsigned long flags;
-	int ret;
-
-	ret = alloc_qpn(qpt, type);
-	if (ret < 0)
-		goto bail;
-	qp->ibqp.qp_num = ret;
-
-	/* Add the QP to the hash table. */
-	spin_lock_irqsave(&qpt->lock, flags);
-
-	ret %= qpt->max;
-	qp->next = qpt->table[ret];
-	qpt->table[ret] = qp;
-	atomic_inc(&qp->refcount);
-
-	spin_unlock_irqrestore(&qpt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_free_qp - remove a QP from the QP table
- * @qpt: the QP table
- * @qp: the QP to remove
- *
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
-{
-	struct ipath_qp *q, **qpp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&qpt->lock, flags);
-
-	/* Remove QP from the hash table. */
-	qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
-	for (; (q = *qpp) != NULL; qpp = &q->next) {
-		if (q == qp) {
-			*qpp = qp->next;
-			qp->next = NULL;
-			atomic_dec(&qp->refcount);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-/**
- * ipath_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
-{
-	unsigned long flags;
-	struct ipath_qp *qp;
-	u32 n, qp_inuse = 0;
-
-	spin_lock_irqsave(&qpt->lock, flags);
-	for (n = 0; n < qpt->max; n++) {
-		qp = qpt->table[n];
-		qpt->table[n] = NULL;
-
-		for (; qp; qp = qp->next)
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&qpt->lock, flags);
-
-	for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
-		if (qpt->map[n].page)
-			free_page((unsigned long) qpt->map[n].page);
-	return qp_inuse;
-}
-
-/**
- * ipath_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-	unsigned long flags;
-	struct ipath_qp *qp;
-
-	spin_lock_irqsave(&qpt->lock, flags);
-
-	for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
-		if (qp->ibqp.qp_num == qpn) {
-			atomic_inc(&qp->refcount);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&qpt->lock, flags);
-	return qp;
-}
-
-/**
- * ipath_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	atomic_set(&qp->s_dma_busy, 0);
-	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_pkt_delay = 0;
-	qp->s_draining = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_rnr_timeout = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-}
-
-/**
- * ipath_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	if (!list_empty(&qp->piowait))
-		list_del_init(&qp->piowait);
-	spin_unlock(&dev->pending_lock);
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		ipath_schedule_send(qp);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
-
-	if (qp->r_rq.wq) {
-		struct ipath_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for ipathverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask, struct ib_udata *udata)
-{
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	struct ipath_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	int lastwqe = 0;
-	int ret;
-
-	spin_lock_irq(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
-
-	if (attr_mask & IB_QP_AV) {
-		if (attr->ah_attr.dlid == 0 ||
-		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
-			goto inval;
-
-		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
-		    (attr->ah_attr.grh.sgid_index > 1))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
-
-	if (attr_mask & IB_QP_PORT)
-		if (attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	/*
-	 * don't allow invalid Path MTU values or greater than 2048
-	 * unless we are configured for a 4KB MTU
-	 */
-	if ((attr_mask & IB_QP_PATH_MTU) &&
-		(ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
-		(attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
-		goto inval;
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE)
-		if (attr->path_mig_state != IB_MIG_MIGRATED &&
-		    attr->path_mig_state != IB_MIG_REARM)
-			goto inval;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
-			goto inval;
-
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			spin_lock(&dev->pending_lock);
-			if (!list_empty(&qp->timerwait))
-				list_del_init(&qp->timerwait);
-			if (!list_empty(&qp->piowait))
-				list_del_init(&qp->piowait);
-			spin_unlock(&dev->pending_lock);
-			qp->s_flags &= ~IPATH_S_ANY_WAIT;
-			spin_unlock_irq(&qp->s_lock);
-			/* Stop the sending tasklet */
-			tasklet_kill(&qp->s_task);
-			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-			spin_lock_irq(&qp->s_lock);
-		}
-		ipath_reset_qp(qp, ibqp->qp_type);
-		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_ERR:
-		lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-		break;
-
-	default:
-		qp->state = new_state;
-		break;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_psn = qp->s_next_psn = attr->sq_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU)
-		qp->path_mtu = attr->path_mtu;
-
-	if (attr_mask & IB_QP_RETRY_CNT)
-		qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry = attr->rnr_retry;
-		if (qp->s_rnr_retry > 7)
-			qp->s_rnr_retry = 7;
-		qp->s_rnr_retry_cnt = qp->s_rnr_retry;
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT)
-		qp->timeout = attr->timeout;
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock_irq(&qp->s_lock);
-
-	if (lastwqe) {
-		struct ib_event ev;
-
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock_irq(&qp->s_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = 0;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = qp->r_psn;
-	attr->sq_psn = qp->s_next_psn;
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = 0;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = 1;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = 0;
-	attr->alt_timeout = 0;
-
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = 1;
-	return 0;
-}
-
-/**
- * ipath_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 ipath_compute_aeth(struct ipath_qp *qp)
-{
-	u32 aeth = qp->r_msn & IPATH_MSN_MASK;
-
-	if (qp->ibqp.srq) {
-		/*
-		 * Shared receive queues don't generate credits.
-		 * Set the credit field to the invalid value.
-		 */
-		aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
-	} else {
-		u32 min, max, x;
-		u32 credits;
-		struct ipath_rwq *wq = qp->r_rq.wq;
-		u32 head;
-		u32 tail;
-
-		/* sanity check pointers before trusting them */
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		/*
-		 * Compute the number of credits available (RWQEs).
-		 * XXX Not holding the r_rq.lock here so there is a small
-		 * chance that the pair of reads are not atomic.
-		 */
-		credits = head - tail;
-		if ((int)credits < 0)
-			credits += qp->r_rq.size;
-		/*
-		 * Binary search the credit table to find the code to
-		 * use.
-		 */
-		min = 0;
-		max = 31;
-		for (;;) {
-			x = (min + max) / 2;
-			if (credit_table[x] == credits)
-				break;
-			if (credit_table[x] > credits)
-				max = x;
-			else if (min == x)
-				break;
-			else
-				min = x;
-		}
-		aeth |= x << IPATH_AETH_CREDIT_SHIFT;
-	}
-	return cpu_to_be32(aeth);
-}
-
-/**
- * ipath_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: unused by InfiniPath
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-			      struct ib_qp_init_attr *init_attr,
-			      struct ib_udata *udata)
-{
-	struct ipath_qp *qp;
-	int err;
-	struct ipath_swqe *swq = NULL;
-	struct ipath_ibdev *dev;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-
-	if (init_attr->create_flags) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
-	    init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
-		    init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	}
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		sz = sizeof(struct ipath_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct ipath_swqe);
-		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct ipath_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
-		    init_attr->qp_type == IB_QPT_SMI ||
-		    init_attr->qp_type == IB_QPT_GSI)) {
-			qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
-			if (!qp->r_ud_sg_list) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		} else
-			qp->r_ud_sg_list = NULL;
-		if (init_attr->srq) {
-			sz = 0;
-			qp->r_rq.size = 0;
-			qp->r_rq.max_sge = 0;
-			qp->r_rq.wq = NULL;
-			init_attr->cap.max_recv_wr = 0;
-			init_attr->cap.max_recv_sge = 0;
-		} else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
-					      qp->r_rq.size * sz);
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_sg_list;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_waitqueue_head(&qp->wait_dma);
-		tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
-		INIT_LIST_HEAD(&qp->piowait);
-		INIT_LIST_HEAD(&qp->timerwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
-		else
-			qp->s_flags = 0;
-		dev = to_idev(ibpd->device);
-		err = ipath_alloc_qpn(&dev->qp_table, qp,
-				      init_attr->qp_type);
-		if (err) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_sg_list;
-		}
-		qp->ip = NULL;
-		qp->s_tx = NULL;
-		ipath_reset_qp(qp, init_attr->qp_type);
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See ipath_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct ipath_rwq) +
-				qp->r_rq.size * sz;
-
-			qp->ip =
-			    ipath_create_mmap_info(dev, s,
-						   ibpd->uobject->context,
-						   qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == ib_ipath_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	ipath_free_qp(&dev->qp_table, qp);
-	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-bail_sg_list:
-	kfree(qp->r_ud_sg_list);
-bail_qp:
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
-}
-
-/**
- * ipath_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int ipath_destroy_qp(struct ib_qp *ibqp)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		spin_lock(&dev->pending_lock);
-		if (!list_empty(&qp->timerwait))
-			list_del_init(&qp->timerwait);
-		if (!list_empty(&qp->piowait))
-			list_del_init(&qp->piowait);
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~IPATH_S_ANY_WAIT;
-		spin_unlock_irq(&qp->s_lock);
-		/* Stop the sending tasklet */
-		tasklet_kill(&qp->s_task);
-		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-	} else
-		spin_unlock_irq(&qp->s_lock);
-
-	ipath_free_qp(&dev->qp_table, qp);
-
-	if (qp->s_tx) {
-		atomic_dec(&qp->refcount);
-		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-			kfree(qp->s_tx->txreq.map_addr);
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
-		spin_unlock_irq(&dev->pending_lock);
-		qp->s_tx = NULL;
-	}
-
-	wait_event(qp->wait, !atomic_read(&qp->refcount));
-
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip)
-		kref_put(&qp->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	kfree(qp->r_ud_sg_list);
-	vfree(qp->s_wq);
-	kfree(qp);
-	return 0;
-}
-
-/**
- * ipath_init_qp_table - initialize the QP table for a device
- * @idev: the device who's QP table we're initializing
- * @size: the size of the QP table
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
-{
-	int i;
-	int ret;
-
-	idev->qp_table.last = 1;	/* QPN 0 and 1 are special. */
-	idev->qp_table.max = size;
-	idev->qp_table.nmaps = 1;
-	idev->qp_table.table = kcalloc(size, sizeof(*idev->qp_table.table),
-				       GFP_KERNEL);
-	if (idev->qp_table.table == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
-		atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
-		idev->qp_table.map[i].page = NULL;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
-{
-	u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
-
-	/*
-	 * If the credit is invalid, we can send
-	 * as many packets as we like.  Otherwise, we have to
-	 * honor the credit field.
-	 */
-	if (credit == IPATH_AETH_CREDIT_INVAL)
-		qp->s_lsn = (u32) -1;
-	else if (qp->s_lsn != (u32) -1) {
-		/* Compute new LSN (i.e., MSN + credit) */
-		credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
-		if (ipath_cmp24(credit, qp->s_lsn) > 0)
-			qp->s_lsn = credit;
-	}
-
-	/* Restart sending if it was blocked due to lack of credits. */
-	if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
-	    qp->s_cur != qp->s_head &&
-	    (qp->s_lsn == (u32) -1 ||
-	     ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
-			 qp->s_lsn + 1) <= 0))
-		ipath_schedule_send(qp);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
deleted file mode 100644
index d4aa535..0000000
--- a/drivers/staging/rdma/ipath/ipath_rc.c
+++ /dev/null
@@ -1,1969 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
-
-static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
-		       u32 psn, u32 pmtu)
-{
-	u32 len;
-
-	len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-	ss->sge = wqe->sg_list[0];
-	ss->sg_list = wqe->sg_list + 1;
-	ss->num_sge = wqe->wr.num_sge;
-	ipath_skip_sge(ss, len);
-	return wqe->length - len;
-}
-
-/**
- * ipath_init_restart- initialize the qp->s_sge after a restart
- * @qp: the QP who's SGE we're restarting
- * @wqe: the work queue to initialize the QP's SGE from
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
-{
-	struct ipath_ibdev *dev;
-
-	qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
-				ib_mtu_enum_to_int(qp->path_mtu));
-	dev = to_idev(qp->ibqp.device);
-	spin_lock(&dev->pending_lock);
-	if (list_empty(&qp->timerwait))
-		list_add_tail(&qp->timerwait,
-			      &dev->pending[dev->pending_index]);
-	spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
- * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- *
- * Return 1 if constructed; otherwise, return 0.
- * Note that we are in the responder's side of the QP context.
- * Note the QP s_lock must be held.
- */
-static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
-			     struct ipath_other_headers *ohdr, u32 pmtu)
-{
-	struct ipath_ack_entry *e;
-	u32 hwords;
-	u32 len;
-	u32 bth0;
-	u32 bth2;
-
-	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-		goto bail;
-
-	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
-	hwords = 5;
-
-	switch (qp->s_ack_state) {
-	case OP(RDMA_READ_RESPONSE_LAST):
-	case OP(RDMA_READ_RESPONSE_ONLY):
-	case OP(ATOMIC_ACKNOWLEDGE):
-		/*
-		 * We can increment the tail pointer now that the last
-		 * response has been sent instead of only being
-		 * constructed.
-		 */
-		if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-			qp->s_tail_ack_queue = 0;
-		/* FALLTHROUGH */
-	case OP(SEND_ONLY):
-	case OP(ACKNOWLEDGE):
-		/* Check for no next entry in the queue. */
-		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & IPATH_S_ACK_PENDING)
-				goto normal;
-			qp->s_ack_state = OP(ACKNOWLEDGE);
-			goto bail;
-		}
-
-		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
-		if (e->opcode == OP(RDMA_READ_REQUEST)) {
-			/* Copy SGE state in case we need to resend */
-			qp->s_ack_rdma_sge = e->rdma_sge;
-			qp->s_cur_sge = &qp->s_ack_rdma_sge;
-			len = e->rdma_sge.sge.sge_length;
-			if (len > pmtu) {
-				len = pmtu;
-				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-			} else {
-				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-				e->sent = 1;
-			}
-			ohdr->u.aeth = ipath_compute_aeth(qp);
-			hwords++;
-			qp->s_ack_rdma_psn = e->psn;
-			bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-		} else {
-			/* COMPARE_SWAP or FETCH_ADD */
-			qp->s_cur_sge = NULL;
-			len = 0;
-			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
-			ohdr->u.at.aeth = ipath_compute_aeth(qp);
-			ohdr->u.at.atomic_ack_eth[0] =
-				cpu_to_be32(e->atomic_data >> 32);
-			ohdr->u.at.atomic_ack_eth[1] =
-				cpu_to_be32(e->atomic_data);
-			hwords += sizeof(ohdr->u.at) / sizeof(u32);
-			bth2 = e->psn;
-			e->sent = 1;
-		}
-		bth0 = qp->s_ack_state << 24;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_FIRST):
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		len = qp->s_ack_rdma_sge.sge.sge_length;
-		if (len > pmtu)
-			len = pmtu;
-		else {
-			ohdr->u.aeth = ipath_compute_aeth(qp);
-			hwords++;
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-			qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
-		}
-		bth0 = qp->s_ack_state << 24;
-		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-		break;
-
-	default:
-	normal:
-		/*
-		 * Send a regular ACK.
-		 * Set the s_ack_state so we wait until after sending
-		 * the ACK before setting s_ack_state to ACKNOWLEDGE
-		 * (see above).
-		 */
-		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~IPATH_S_ACK_PENDING;
-		qp->s_cur_sge = NULL;
-		if (qp->s_nak_state)
-			ohdr->u.aeth =
-				cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-					    (qp->s_nak_state <<
-					     IPATH_AETH_CREDIT_SHIFT));
-		else
-			ohdr->u.aeth = ipath_compute_aeth(qp);
-		hwords++;
-		len = 0;
-		bth0 = OP(ACKNOWLEDGE) << 24;
-		bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
-	}
-	qp->s_hdrwords = hwords;
-	qp->s_cur_size = len;
-	ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
-	return 1;
-
-bail:
-	return 0;
-}
-
-/**
- * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_rc_req(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_other_headers *ohdr;
-	struct ipath_sge_state *ss;
-	struct ipath_swqe *wqe;
-	u32 hwords;
-	u32 len;
-	u32 bth0;
-	u32 bth2;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	char newreq;
-	unsigned long flags;
-	int ret = 0;
-
-	ohdr = &qp->s_hdr.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
-
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout resends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Sending responses has higher priority over sending requests. */
-	if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-	     (qp->s_flags & IPATH_S_ACK_PENDING) ||
-	     qp->s_ack_state != OP(ACKNOWLEDGE)) &&
-	    ipath_make_rc_ack(dev, qp, ohdr, pmtu))
-		goto done;
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= IPATH_S_WAIT_DMA;
-			goto bail;
-		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
-	}
-
-	/* Leave BUSY set until RNR timeout. */
-	if (qp->s_rnr_timeout) {
-		qp->s_flags |= IPATH_S_WAITING;
-		goto bail;
-	}
-
-	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
-	hwords = 5;
-	bth0 = 1 << 22; /* Set M bit */
-
-	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
-	switch (qp->s_state) {
-	default:
-		if (!(ib_ipath_state_ops[qp->state] &
-		    IPATH_PROCESS_NEXT_SEND_OK))
-			goto bail;
-		/*
-		 * Resend an old request or start a new one.
-		 *
-		 * We keep track of the current SWQE so that
-		 * we don't reset the "furthest progress" state
-		 * if we need to back up.
-		 */
-		newreq = 0;
-		if (qp->s_cur == qp->s_tail) {
-			/* Check if send work queue is empty. */
-			if (qp->s_tail == qp->s_head)
-				goto bail;
-			/*
-			 * If a fence is requested, wait for previous
-			 * RDMA read and atomic operations to finish.
-			 */
-			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
-			    qp->s_num_rd_atomic) {
-				qp->s_flags |= IPATH_S_FENCE_PENDING;
-				goto bail;
-			}
-			wqe->psn = qp->s_next_psn;
-			newreq = 1;
-		}
-		/*
-		 * Note that we have to be careful not to modify the
-		 * original work request since we may need to resend
-		 * it.
-		 */
-		len = wqe->length;
-		ss = &qp->s_sge;
-		bth2 = 0;
-		switch (wqe->wr.opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_IMM:
-			/* If no credit, return. */
-			if (qp->s_lsn != (u32) -1 &&
-			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-				goto bail;
-			}
-			wqe->lpsn = wqe->psn;
-			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
-				qp->s_state = OP(SEND_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_SEND)
-				qp->s_state = OP(SEND_ONLY);
-			else {
-				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after the BTH */
-				ohdr->u.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-			}
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-			bth2 = 1 << 31;	/* Request ACK. */
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_RDMA_WRITE:
-			if (newreq && qp->s_lsn != (u32) -1)
-				qp->s_lsn++;
-			/* FALLTHROUGH */
-		case IB_WR_RDMA_WRITE_WITH_IMM:
-			/* If no credit, return. */
-			if (qp->s_lsn != (u32) -1 &&
-			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-				goto bail;
-			}
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->rdma_wr.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
-			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
-				qp->s_state = OP(RDMA_WRITE_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
-				qp->s_state =
-					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after RETH */
-				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-					bth0 |= 1 << 23;
-			}
-			bth2 = 1 << 31;	/* Request ACK. */
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_RDMA_READ:
-			/*
-			 * Don't allow more operations to be started
-			 * than the QP limits allow.
-			 */
-			if (newreq) {
-				if (qp->s_num_rd_atomic >=
-				    qp->s_max_rd_atomic) {
-					qp->s_flags |= IPATH_S_RDMAR_PENDING;
-					goto bail;
-				}
-				qp->s_num_rd_atomic++;
-				if (qp->s_lsn != (u32) -1)
-					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
-			}
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->rdma_wr.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			qp->s_state = OP(RDMA_READ_REQUEST);
-			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-			ss = NULL;
-			len = 0;
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_ATOMIC_CMP_AND_SWP:
-		case IB_WR_ATOMIC_FETCH_AND_ADD:
-			/*
-			 * Don't allow more operations to be started
-			 * than the QP limits allow.
-			 */
-			if (newreq) {
-				if (qp->s_num_rd_atomic >=
-				    qp->s_max_rd_atomic) {
-					qp->s_flags |= IPATH_S_RDMAR_PENDING;
-					goto bail;
-				}
-				qp->s_num_rd_atomic++;
-				if (qp->s_lsn != (u32) -1)
-					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
-			}
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-				qp->s_state = OP(COMPARE_SWAP);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.swap);
-				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
-			} else {
-				qp->s_state = OP(FETCH_ADD);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
-				ohdr->u.atomic_eth.compare_data = 0;
-			}
-			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr >> 32);
-			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr);
-			ohdr->u.atomic_eth.rkey = cpu_to_be32(
-				wqe->atomic_wr.rkey);
-			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
-			ss = NULL;
-			len = 0;
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		default:
-			goto bail;
-		}
-		qp->s_sge.sge = wqe->sg_list[0];
-		qp->s_sge.sg_list = wqe->sg_list + 1;
-		qp->s_sge.num_sge = wqe->wr.num_sge;
-		qp->s_len = wqe->length;
-		if (newreq) {
-			qp->s_tail++;
-			if (qp->s_tail >= qp->s_size)
-				qp->s_tail = 0;
-		}
-		bth2 |= qp->s_psn & IPATH_PSN_MASK;
-		if (wqe->wr.opcode == IB_WR_RDMA_READ)
-			qp->s_psn = wqe->lpsn + 1;
-		else {
-			qp->s_psn++;
-			if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
-		/*
-		 * Put the QP on the pending list so lost ACKs will cause
-		 * a retry.  More than one request can be pending so the
-		 * QP may already be on the dev->pending list.
-		 */
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->timerwait))
-			list_add_tail(&qp->timerwait,
-				      &dev->pending[dev->pending_index]);
-		spin_unlock(&dev->pending_lock);
-		break;
-
-	case OP(RDMA_READ_RESPONSE_FIRST):
-		/*
-		 * This case can only happen if a send is restarted.
-		 * See ipath_restart_rc().
-		 */
-		ipath_init_restart(qp, wqe);
-		/* FALLTHROUGH */
-	case OP(SEND_FIRST):
-		qp->s_state = OP(SEND_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
-		ss = &qp->s_sge;
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_SEND)
-			qp->s_state = OP(SEND_LAST);
-		else {
-			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-		}
-		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-			bth0 |= 1 << 23;
-		bth2 |= 1 << 31;	/* Request ACK. */
-		qp->s_cur++;
-		if (qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-		/*
-		 * This case can only happen if a RDMA write is restarted.
-		 * See ipath_restart_rc().
-		 */
-		ipath_init_restart(qp, wqe);
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_FIRST):
-		qp->s_state = OP(RDMA_WRITE_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_MIDDLE):
-		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
-		ss = &qp->s_sge;
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
-			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-		}
-		bth2 |= 1 << 31;	/* Request ACK. */
-		qp->s_cur++;
-		if (qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		/*
-		 * This case can only happen if a RDMA read is restarted.
-		 * See ipath_restart_rc().
-		 */
-		ipath_init_restart(qp, wqe);
-		len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-		ohdr->u.rc.reth.vaddr =
-			cpu_to_be64(wqe->rdma_wr.remote_addr + len);
-		ohdr->u.rc.reth.rkey =
-			cpu_to_be32(wqe->rdma_wr.rkey);
-		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
-		qp->s_state = OP(RDMA_READ_REQUEST);
-		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-		bth2 = qp->s_psn & IPATH_PSN_MASK;
-		qp->s_psn = wqe->lpsn + 1;
-		ss = NULL;
-		len = 0;
-		qp->s_cur++;
-		if (qp->s_cur == qp->s_size)
-			qp->s_cur = 0;
-		break;
-	}
-	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
-		bth2 |= 1 << 31;	/* Request ACK. */
-	qp->s_len -= len;
-	qp->s_hdrwords = hwords;
-	qp->s_cur_sge = ss;
-	qp->s_cur_size = len;
-	ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
-done:
-	ret = 1;
-	goto unlock;
-
-bail:
-	qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
- *
- * This is called from ipath_rc_rcv() and only uses the receive
- * side QP state.
- * Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
- */
-static void send_rc_ack(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_devdata *dd;
-	u16 lrh0;
-	u32 bth0;
-	u32 hwords;
-	u32 __iomem *piobuf;
-	struct ipath_ib_header hdr;
-	struct ipath_other_headers *ohdr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-	    (qp->s_flags & IPATH_S_ACK_PENDING) ||
-	    qp->s_ack_state != OP(ACKNOWLEDGE))
-		goto queue_ack;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	/* Don't try to send ACKs if the link isn't ACTIVE */
-	dd = dev->dd;
-	if (!(dd->ipath_flags & IPATH_LINKACTIVE))
-		goto done;
-
-	piobuf = ipath_getpiobuf(dd, 0, NULL);
-	if (!piobuf) {
-		/*
-		 * We are out of PIO buffers at the moment.
-		 * Pass responsibility for sending the ACK to the
-		 * send tasklet so that when a PIO buffer becomes
-		 * available, the ACK is sent ahead of other outgoing
-		 * packets.
-		 */
-		spin_lock_irqsave(&qp->s_lock, flags);
-		goto queue_ack;
-	}
-
-	/* Construct the header. */
-	ohdr = &hdr.u.oth;
-	lrh0 = IPATH_LRH_BTH;
-	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
-	hwords = 6;
-	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		hwords += ipath_make_grh(dev, &hdr.u.l.grh,
-					 &qp->remote_ah_attr.grh,
-					 hwords, 0);
-		ohdr = &hdr.u.l.oth;
-		lrh0 = IPATH_LRH_GRH;
-	}
-	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
-		(OP(ACKNOWLEDGE) << 24) | (1 << 22);
-	if (qp->r_nak_state)
-		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-					    (qp->r_nak_state <<
-					     IPATH_AETH_CREDIT_SHIFT));
-	else
-		ohdr->u.aeth = ipath_compute_aeth(qp);
-	lrh0 |= qp->remote_ah_attr.sl << 4;
-	hdr.lrh[0] = cpu_to_be16(lrh0);
-	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
-				 qp->remote_ah_attr.src_path_bits);
-	ohdr->bth[0] = cpu_to_be32(bth0);
-	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
-
-	writeq(hwords + 1, piobuf);
-
-	if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-		u32 *hdrp = (u32 *) &hdr;
-
-		ipath_flush_wc();
-		__iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
-		ipath_flush_wc();
-		__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
-	} else
-		__iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
-
-	ipath_flush_wc();
-
-	dev->n_unicast_xmit++;
-	goto done;
-
-queue_ack:
-	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
-		dev->n_rc_qacks++;
-		qp->s_flags |= IPATH_S_ACK_PENDING;
-		qp->s_nak_state = qp->r_nak_state;
-		qp->s_ack_psn = qp->r_ack_psn;
-
-		/* Schedule the send tasklet. */
-		ipath_schedule_send(qp);
-	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-	return;
-}
-
-/**
- * reset_psn - reset the QP state to send starting from PSN
- * @qp: the QP
- * @psn: the packet sequence number to restart at
- *
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held.
- */
-static void reset_psn(struct ipath_qp *qp, u32 psn)
-{
-	u32 n = qp->s_last;
-	struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
-	u32 opcode;
-
-	qp->s_cur = n;
-
-	/*
-	 * If we are starting the request from the beginning,
-	 * let the normal send code handle initialization.
-	 */
-	if (ipath_cmp24(psn, wqe->psn) <= 0) {
-		qp->s_state = OP(SEND_LAST);
-		goto done;
-	}
-
-	/* Find the work request opcode corresponding to the given PSN. */
-	opcode = wqe->wr.opcode;
-	for (;;) {
-		int diff;
-
-		if (++n == qp->s_size)
-			n = 0;
-		if (n == qp->s_tail)
-			break;
-		wqe = get_swqe_ptr(qp, n);
-		diff = ipath_cmp24(psn, wqe->psn);
-		if (diff < 0)
-			break;
-		qp->s_cur = n;
-		/*
-		 * If we are starting the request from the beginning,
-		 * let the normal send code handle initialization.
-		 */
-		if (diff == 0) {
-			qp->s_state = OP(SEND_LAST);
-			goto done;
-		}
-		opcode = wqe->wr.opcode;
-	}
-
-	/*
-	 * Set the state to restart in the middle of a request.
-	 * Don't change the s_sge, s_cur_sge, or s_cur_size.
-	 * See ipath_make_rc_req().
-	 */
-	switch (opcode) {
-	case IB_WR_SEND:
-	case IB_WR_SEND_WITH_IMM:
-		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
-		break;
-
-	case IB_WR_RDMA_WRITE:
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
-		break;
-
-	case IB_WR_RDMA_READ:
-		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-		break;
-
-	default:
-		/*
-		 * This case shouldn't happen since its only
-		 * one PSN per req.
-		 */
-		qp->s_state = OP(SEND_LAST);
-	}
-done:
-	qp->s_psn = psn;
-}
-
-/**
- * ipath_restart_rc - back up requester to resend the last un-ACKed request
- * @qp: the QP to restart
- * @psn: packet sequence number for the request
- * @wc: the work completion request
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
-{
-	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-	struct ipath_ibdev *dev;
-
-	if (qp->s_retry == 0) {
-		ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-		goto bail;
-	}
-	qp->s_retry--;
-
-	/*
-	 * Remove the QP from the timeout queue.
-	 * Note: it may already have been removed by ipath_ib_timer().
-	 */
-	dev = to_idev(qp->ibqp.device);
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	if (!list_empty(&qp->piowait))
-		list_del_init(&qp->piowait);
-	spin_unlock(&dev->pending_lock);
-
-	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		dev->n_rc_resends++;
-	else
-		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-	reset_psn(qp, psn);
-	ipath_schedule_send(qp);
-
-bail:
-	return;
-}
-
-static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
-{
-	qp->s_last_psn = psn;
-}
-
-/**
- * do_rc_ack - process an incoming RC ACK
- * @qp: the QP the ACK came in on
- * @psn: the packet sequence number of the ACK
- * @opcode: the opcode of the request that resulted in the ACK
- *
- * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held and interrupts disabled.
- * Returns 1 if OK, 0 if current operation should be aborted (NAK).
- */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
-		     u64 val)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	enum ib_wc_status status;
-	struct ipath_swqe *wqe;
-	int ret = 0;
-	u32 ack_psn;
-	int diff;
-
-	/*
-	 * Remove the QP from the timeout queue (or RNR timeout queue).
-	 * If ipath_ib_timer() has already removed it,
-	 * it's OK since we hold the QP s_lock and ipath_restart_rc()
-	 * just won't find anything to restart if we ACK everything.
-	 */
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	spin_unlock(&dev->pending_lock);
-
-	/*
-	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
-	 * requests and implicitly NAK RDMA read and atomic requests issued
-	 * before the NAK'ed request.  The MSN won't include the NAK'ed
-	 * request but will include an ACK'ed request(s).
-	 */
-	ack_psn = psn;
-	if (aeth >> 29)
-		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_last);
-
-	/*
-	 * The MSN might be for a later WQE than the PSN indicates so
-	 * only complete WQEs that the PSN finishes.
-	 */
-	while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
-		/*
-		 * RDMA_READ_RESPONSE_ONLY is a special case since
-		 * we want to generate completion events for everything
-		 * before the RDMA read, copy the data, then generate
-		 * the completion for the read.
-		 */
-		if (wqe->wr.opcode == IB_WR_RDMA_READ &&
-		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
-		    diff == 0) {
-			ret = 1;
-			goto bail;
-		}
-		/*
-		 * If this request is a RDMA read or atomic, and the ACK is
-		 * for a later operation, this ACK NAKs the RDMA read or
-		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
-		 * can ACK a RDMA read and likewise for atomic ops.  Note
-		 * that the NAK case can only happen if relaxed ordering is
-		 * used and requests are sent after an RDMA read or atomic
-		 * is sent but before the response is received.
-		 */
-		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
-		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
-			/*
-			 * The last valid PSN seen is the previous
-			 * request's.
-			 */
-			update_last_psn(qp, wqe->psn - 1);
-			/* Retry this request. */
-			ipath_restart_rc(qp, wqe->psn);
-			/*
-			 * No need to process the ACK/NAK since we are
-			 * restarting an earlier request.
-			 */
-			goto bail;
-		}
-		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			*(u64 *) wqe->sg_list[0].vaddr = val;
-		if (qp->s_num_rd_atomic &&
-		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
-		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
-			qp->s_num_rd_atomic--;
-			/* Restart sending task if fence is complete */
-			if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
-			     !qp->s_num_rd_atomic) ||
-			    qp->s_flags & IPATH_S_RDMAR_PENDING)
-				ipath_schedule_send(qp);
-		}
-		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-			memset(&wc, 0, sizeof wc);
-			wc.wr_id = wqe->wr.wr_id;
-			wc.status = IB_WC_SUCCESS;
-			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-			wc.byte_len = wqe->length;
-			wc.qp = &qp->ibqp;
-			wc.src_qp = qp->remote_qpn;
-			wc.slid = qp->remote_ah_attr.dlid;
-			wc.sl = qp->remote_ah_attr.sl;
-			ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
-		}
-		qp->s_retry = qp->s_retry_cnt;
-		/*
-		 * If we are completing a request which is in the process of
-		 * being resent, we can stop resending it since we know the
-		 * responder has already seen it.
-		 */
-		if (qp->s_last == qp->s_cur) {
-			if (++qp->s_cur >= qp->s_size)
-				qp->s_cur = 0;
-			qp->s_last = qp->s_cur;
-			if (qp->s_last == qp->s_tail)
-				break;
-			wqe = get_swqe_ptr(qp, qp->s_cur);
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = wqe->psn;
-		} else {
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-			if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
-				qp->s_draining = 0;
-			if (qp->s_last == qp->s_tail)
-				break;
-			wqe = get_swqe_ptr(qp, qp->s_last);
-		}
-	}
-
-	switch (aeth >> 29) {
-	case 0:		/* ACK */
-		dev->n_rc_acks++;
-		/* If this is a partial ACK, reset the retransmit timer. */
-		if (qp->s_last != qp->s_tail) {
-			spin_lock(&dev->pending_lock);
-			if (list_empty(&qp->timerwait))
-				list_add_tail(&qp->timerwait,
-					&dev->pending[dev->pending_index]);
-			spin_unlock(&dev->pending_lock);
-			/*
-			 * If we get a partial ACK for a resent operation,
-			 * we can stop resending the earlier packets and
-			 * continue with the next packet the receiver wants.
-			 */
-			if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-				reset_psn(qp, psn + 1);
-				ipath_schedule_send(qp);
-			}
-		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = psn + 1;
-		}
-		ipath_get_credit(qp, aeth);
-		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		qp->s_retry = qp->s_retry_cnt;
-		update_last_psn(qp, psn);
-		ret = 1;
-		goto bail;
-
-	case 1:		/* RNR NAK */
-		dev->n_rnr_naks++;
-		if (qp->s_last == qp->s_tail)
-			goto bail;
-		if (qp->s_rnr_retry == 0) {
-			status = IB_WC_RNR_RETRY_EXC_ERR;
-			goto class_b;
-		}
-		if (qp->s_rnr_retry_cnt < 7)
-			qp->s_rnr_retry--;
-
-		/* The last valid PSN is the previous PSN. */
-		update_last_psn(qp, psn - 1);
-
-		if (wqe->wr.opcode == IB_WR_RDMA_READ)
-			dev->n_rc_resends++;
-		else
-			dev->n_rc_resends +=
-				(qp->s_psn - psn) & IPATH_PSN_MASK;
-
-		reset_psn(qp, psn);
-
-		qp->s_rnr_timeout =
-			ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
-					   IPATH_AETH_CREDIT_MASK];
-		ipath_insert_rnr_queue(qp);
-		ipath_schedule_send(qp);
-		goto bail;
-
-	case 3:		/* NAK */
-		if (qp->s_last == qp->s_tail)
-			goto bail;
-		/* The last valid PSN is the previous PSN. */
-		update_last_psn(qp, psn - 1);
-		switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
-			IPATH_AETH_CREDIT_MASK) {
-		case 0:	/* PSN sequence error */
-			dev->n_seq_naks++;
-			/*
-			 * Back up to the responder's expected PSN.
-			 * Note that we might get a NAK in the middle of an
-			 * RDMA READ response which terminates the RDMA
-			 * READ.
-			 */
-			ipath_restart_rc(qp, psn);
-			break;
-
-		case 1:	/* Invalid Request */
-			status = IB_WC_REM_INV_REQ_ERR;
-			dev->n_other_naks++;
-			goto class_b;
-
-		case 2:	/* Remote Access Error */
-			status = IB_WC_REM_ACCESS_ERR;
-			dev->n_other_naks++;
-			goto class_b;
-
-		case 3:	/* Remote Operation Error */
-			status = IB_WC_REM_OP_ERR;
-			dev->n_other_naks++;
-		class_b:
-			ipath_send_complete(qp, wqe, status);
-			ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-			break;
-
-		default:
-			/* Ignore other reserved NAK error codes */
-			goto reserved;
-		}
-		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		goto bail;
-
-	default:		/* 2: reserved */
-	reserved:
-		/* Ignore reserved NAK codes. */
-		goto bail;
-	}
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_rc_rcv_resp - process an incoming RC response packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @hdrsize: the header length
- * @pmtu: the path MTU
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an incoming RC response
- * packet for the given QP.
- * Called at interrupt level.
- */
-static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
-				     struct ipath_other_headers *ohdr,
-				     void *data, u32 tlen,
-				     struct ipath_qp *qp,
-				     u32 opcode,
-				     u32 psn, u32 hdrsize, u32 pmtu,
-				     int header_in_data)
-{
-	struct ipath_swqe *wqe;
-	enum ib_wc_status status;
-	unsigned long flags;
-	int diff;
-	u32 pad;
-	u32 aeth;
-	u64 val;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Double check we can process this now that we hold the s_lock. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-		goto ack_done;
-
-	/* Ignore invalid responses. */
-	if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
-		goto ack_done;
-
-	/* Ignore duplicate responses. */
-	diff = ipath_cmp24(psn, qp->s_last_psn);
-	if (unlikely(diff <= 0)) {
-		/* Update credits for "ghost" ACKs */
-		if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
-			if (!header_in_data)
-				aeth = be32_to_cpu(ohdr->u.aeth);
-			else {
-				aeth = be32_to_cpu(((__be32 *) data)[0]);
-				data += sizeof(__be32);
-			}
-			if ((aeth >> 29) == 0)
-				ipath_get_credit(qp, aeth);
-		}
-		goto ack_done;
-	}
-
-	if (unlikely(qp->s_last == qp->s_tail))
-		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_last);
-	status = IB_WC_SUCCESS;
-
-	switch (opcode) {
-	case OP(ACKNOWLEDGE):
-	case OP(ATOMIC_ACKNOWLEDGE):
-	case OP(RDMA_READ_RESPONSE_FIRST):
-		if (!header_in_data)
-			aeth = be32_to_cpu(ohdr->u.aeth);
-		else {
-			aeth = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		}
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-			if (!header_in_data) {
-				__be32 *p = ohdr->u.at.atomic_ack_eth;
-
-				val = ((u64) be32_to_cpu(p[0]) << 32) |
-					be32_to_cpu(p[1]);
-			} else
-				val = be64_to_cpu(((__be64 *) data)[0]);
-		} else
-			val = 0;
-		if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
-		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
-			goto ack_done;
-		hdrsize += 4;
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
-		qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
-		/*
-		 * If this is a response to a resent RDMA read, we
-		 * have to be careful to copy the data to the right
-		 * location.
-		 */
-		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-						  wqe, psn, pmtu);
-		goto read_middle;
-
-	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		/* no AETH, no ACK */
-		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-			dev->n_rdma_seq++;
-			if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-				goto ack_done;
-			qp->r_flags |= IPATH_R_RDMAR_SEQ;
-			ipath_restart_rc(qp, qp->s_last_psn + 1);
-			goto ack_done;
-		}
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
-	read_middle:
-		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto ack_len_err;
-		if (unlikely(pmtu >= qp->s_rdma_read_len))
-			goto ack_len_err;
-
-		/* We got a response so update the timeout. */
-		spin_lock(&dev->pending_lock);
-		if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
-			list_move_tail(&qp->timerwait,
-				       &dev->pending[dev->pending_index]);
-		spin_unlock(&dev->pending_lock);
-
-		if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
-			qp->s_retry = qp->s_retry_cnt;
-
-		/*
-		 * Update the RDMA receive state but do the copy w/o
-		 * holding the locks and blocking interrupts.
-		 */
-		qp->s_rdma_read_len -= pmtu;
-		update_last_psn(qp, psn);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
-		goto bail;
-
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		if (!header_in_data)
-			aeth = be32_to_cpu(ohdr->u.aeth);
-		else
-			aeth = be32_to_cpu(((__be32 *) data)[0]);
-		if (!do_rc_ack(qp, aeth, psn, opcode, 0))
-			goto ack_done;
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/*
-		 * Check that the data size is >= 0 && <= pmtu.
-		 * Remember to account for the AETH header (4) and
-		 * ICRC (4).
-		 */
-		if (unlikely(tlen < (hdrsize + pad + 8)))
-			goto ack_len_err;
-		/*
-		 * If this is a response to a resent RDMA read, we
-		 * have to be careful to copy the data to the right
-		 * location.
-		 */
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-						  wqe, psn, pmtu);
-		goto read_last;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-		/* ACKs READ req. */
-		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-			dev->n_rdma_seq++;
-			if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-				goto ack_done;
-			qp->r_flags |= IPATH_R_RDMAR_SEQ;
-			ipath_restart_rc(qp, qp->s_last_psn + 1);
-			goto ack_done;
-		}
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/*
-		 * Check that the data size is >= 1 && <= pmtu.
-		 * Remember to account for the AETH header (4) and
-		 * ICRC (4).
-		 */
-		if (unlikely(tlen <= (hdrsize + pad + 8)))
-			goto ack_len_err;
-	read_last:
-		tlen -= hdrsize + pad + 8;
-		if (unlikely(tlen != qp->s_rdma_read_len))
-			goto ack_len_err;
-		if (!header_in_data)
-			aeth = be32_to_cpu(ohdr->u.aeth);
-		else {
-			aeth = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		}
-		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-		(void) do_rc_ack(qp, aeth, psn,
-				 OP(RDMA_READ_RESPONSE_LAST), 0);
-		goto ack_done;
-	}
-
-ack_op_err:
-	status = IB_WC_LOC_QP_OP_ERR;
-	goto ack_err;
-
-ack_len_err:
-	status = IB_WC_LOC_LEN_ERR;
-ack_err:
-	ipath_send_complete(qp, wqe, status);
-	ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-ack_done:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-bail:
-	return;
-}
-
-/**
- * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @diff: the difference between the PSN and the expected PSN
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an unexpected
- * incoming RC packet for the given QP.
- * Called at interrupt level.
- * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent.
- */
-static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
-				     struct ipath_other_headers *ohdr,
-				     void *data,
-				     struct ipath_qp *qp,
-				     u32 opcode,
-				     u32 psn,
-				     int diff,
-				     int header_in_data)
-{
-	struct ipath_ack_entry *e;
-	u8 i, prev;
-	int old_req;
-	unsigned long flags;
-
-	if (diff > 0) {
-		/*
-		 * Packet sequence error.
-		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if we already sent one.
-		 */
-		if (!qp->r_nak_state) {
-			qp->r_nak_state = IB_NAK_PSN_ERROR;
-			/* Use the expected PSN. */
-			qp->r_ack_psn = qp->r_psn;
-			goto send_ack;
-		}
-		goto done;
-	}
-
-	/*
-	 * Handle a duplicate request.  Don't re-execute SEND, RDMA
-	 * write or atomic op.  Don't NAK errors, just silently drop
-	 * the duplicate request.  Note that r_sge, r_len, and
-	 * r_rcv_len may be in use so don't modify them.
-	 *
-	 * We are supposed to ACK the earliest duplicate PSN but we
-	 * can coalesce an outstanding duplicate ACK.  We have to
-	 * send the earliest so that RDMA reads can be restarted at
-	 * the requester's expected PSN.
-	 *
-	 * First, find where this duplicate PSN falls within the
-	 * ACKs previously sent.
-	 */
-	psn &= IPATH_PSN_MASK;
-	e = NULL;
-	old_req = 1;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-	/* Double check we can process this now that we hold the s_lock. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-		goto unlock_done;
-
-	for (i = qp->r_head_ack_queue; ; i = prev) {
-		if (i == qp->s_tail_ack_queue)
-			old_req = 0;
-		if (i)
-			prev = i - 1;
-		else
-			prev = IPATH_MAX_RDMA_ATOMIC;
-		if (prev == qp->r_head_ack_queue) {
-			e = NULL;
-			break;
-		}
-		e = &qp->s_ack_queue[prev];
-		if (!e->opcode) {
-			e = NULL;
-			break;
-		}
-		if (ipath_cmp24(psn, e->psn) >= 0) {
-			if (prev == qp->s_tail_ack_queue)
-				old_req = 0;
-			break;
-		}
-	}
-	switch (opcode) {
-	case OP(RDMA_READ_REQUEST): {
-		struct ib_reth *reth;
-		u32 offset;
-		u32 len;
-
-		/*
-		 * If we didn't find the RDMA read request in the ack queue,
-		 * or the send tasklet is already backed up to send an
-		 * earlier entry, we can ignore this request.
-		 */
-		if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
-			goto unlock_done;
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		/*
-		 * Address range must be a subset of the original
-		 * request and start on pmtu boundaries.
-		 * We reuse the old ack_queue slot since the requester
-		 * should not back up and request an earlier PSN for the
-		 * same request.
-		 */
-		offset = ((psn - e->psn) & IPATH_PSN_MASK) *
-			ib_mtu_enum_to_int(qp->path_mtu);
-		len = be32_to_cpu(reth->length);
-		if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
-			goto unlock_done;
-		if (len != 0) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			ok = ipath_rkey_ok(qp, &e->rdma_sge,
-					   len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok))
-				goto unlock_done;
-		} else {
-			e->rdma_sge.sg_list = NULL;
-			e->rdma_sge.num_sge = 0;
-			e->rdma_sge.sge.mr = NULL;
-			e->rdma_sge.sge.vaddr = NULL;
-			e->rdma_sge.sge.length = 0;
-			e->rdma_sge.sge.sge_length = 0;
-		}
-		e->psn = psn;
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		qp->s_tail_ack_queue = prev;
-		break;
-	}
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD): {
-		/*
-		 * If we didn't find the atomic request in the ack queue
-		 * or the send tasklet is already backed up to send an
-		 * earlier entry, we can ignore this request.
-		 */
-		if (!e || e->opcode != (u8) opcode || old_req)
-			goto unlock_done;
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		qp->s_tail_ack_queue = prev;
-		break;
-	}
-
-	default:
-		if (old_req)
-			goto unlock_done;
-		/*
-		 * Resend the most recent ACK if this request is
-		 * after all the previous RDMA reads and atomics.
-		 */
-		if (i == qp->r_head_ack_queue) {
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			qp->r_nak_state = 0;
-			qp->r_ack_psn = qp->r_psn - 1;
-			goto send_ack;
-		}
-		/*
-		 * Try to send a simple ACK to work around a Mellanox bug
-		 * which doesn't accept a RDMA read response or atomic
-		 * response as an ACK for earlier SENDs or RDMA writes.
-		 */
-		if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
-		    !(qp->s_flags & IPATH_S_ACK_PENDING) &&
-		    qp->s_ack_state == OP(ACKNOWLEDGE)) {
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			qp->r_nak_state = 0;
-			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
-			goto send_ack;
-		}
-		/*
-		 * Resend the RDMA read or atomic op which
-		 * ACKs this duplicate request.
-		 */
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		qp->s_tail_ack_queue = i;
-		break;
-	}
-	qp->r_nak_state = 0;
-	ipath_schedule_send(qp);
-
-unlock_done:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-	return 1;
-
-send_ack:
-	return 0;
-}
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
-{
-	unsigned long flags;
-	int lastwqe;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = ipath_error_qp(qp, err);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	if (lastwqe) {
-		struct ib_event ev;
-
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-}
-
-static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
-{
-	unsigned next;
-
-	next = n + 1;
-	if (next > IPATH_MAX_RDMA_ATOMIC)
-		next = 0;
-	if (n == qp->s_tail_ack_queue) {
-		qp->s_tail_ack_queue = next;
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-	}
-}
-
-/**
- * ipath_rc_rcv - process an incoming RC packet
- * @dev: the device this packet came in on
- * @hdr: the header of this packet
- * @has_grh: true if the header has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- *
- * This is called from ipath_qp_rcv() to process an incoming RC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	u32 opcode;
-	u32 hdrsize;
-	u32 psn;
-	u32 pad;
-	struct ib_wc wc;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	int diff;
-	struct ib_reth *reth;
-	int header_in_data;
-	unsigned long flags;
-
-	/* Validate the SLID. See Ch. 9.6.1.5 */
-	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-		goto done;
-
-	/* Check for GRH */
-	if (!has_grh) {
-		ohdr = &hdr->u.oth;
-		hdrsize = 8 + 12;	/* LRH + BTH */
-		psn = be32_to_cpu(ohdr->bth[2]);
-		header_in_data = 0;
-	} else {
-		ohdr = &hdr->u.l.oth;
-		hdrsize = 8 + 40 + 12;	/* LRH + GRH + BTH */
-		/*
-		 * The header with GRH is 60 bytes and the core driver sets
-		 * the eager header buffer size to 56 bytes so the last 4
-		 * bytes of the BTH header (PSN) is in the data buffer.
-		 */
-		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-		if (header_in_data) {
-			psn = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		} else
-			psn = be32_to_cpu(ohdr->bth[2]);
-	}
-
-	/*
-	 * Process responses (ACKs) before anything else.  Note that the
-	 * packet sequence number will be for something in the send work
-	 * queue rather than the expected receive packet sequence number.
-	 * In other words, this QP is the requester.
-	 */
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
-	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
-		ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
-				  hdrsize, pmtu, header_in_data);
-		goto done;
-	}
-
-	/* Compute 24 bits worth of difference. */
-	diff = ipath_cmp24(psn, qp->r_psn);
-	if (unlikely(diff)) {
-		if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
-				       psn, diff, header_in_data))
-			goto done;
-		goto send_ack;
-	}
-
-	/* Check for opcode sequence errors. */
-	switch (qp->r_state) {
-	case OP(SEND_FIRST):
-	case OP(SEND_MIDDLE):
-		if (opcode == OP(SEND_MIDDLE) ||
-		    opcode == OP(SEND_LAST) ||
-		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-			break;
-		goto nack_inv;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_MIDDLE):
-		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-		    opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-			break;
-		goto nack_inv;
-
-	default:
-		if (opcode == OP(SEND_MIDDLE) ||
-		    opcode == OP(SEND_LAST) ||
-		    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
-		    opcode == OP(RDMA_WRITE_MIDDLE) ||
-		    opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-			goto nack_inv;
-		/*
-		 * Note that it is up to the requester to not send a new
-		 * RDMA read or atomic operation before receiving an ACK
-		 * for the previous operation.
-		 */
-		break;
-	}
-
-	memset(&wc, 0, sizeof wc);
-
-	/* OK, process the packet. */
-	switch (opcode) {
-	case OP(SEND_FIRST):
-		if (!ipath_get_rwqe(qp, 0))
-			goto rnr_nak;
-		qp->r_rcv_len = 0;
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-	case OP(RDMA_WRITE_MIDDLE):
-	send_middle:
-		/* Check for invalid length PMTU or posted rwqe len. */
-		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto nack_inv;
-		qp->r_rcv_len += pmtu;
-		if (unlikely(qp->r_rcv_len > qp->r_len))
-			goto nack_inv;
-		ipath_copy_sge(&qp->r_sge, data, pmtu);
-		break;
-
-	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-		/* consume RWQE */
-		if (!ipath_get_rwqe(qp, 1))
-			goto rnr_nak;
-		goto send_last_imm;
-
-	case OP(SEND_ONLY):
-	case OP(SEND_ONLY_WITH_IMMEDIATE):
-		if (!ipath_get_rwqe(qp, 0))
-			goto rnr_nak;
-		qp->r_rcv_len = 0;
-		if (opcode == OP(SEND_ONLY))
-			goto send_last;
-		/* FALLTHROUGH */
-	case OP(SEND_LAST_WITH_IMMEDIATE):
-	send_last_imm:
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else {
-			/* Immediate data comes after BTH */
-			wc.ex.imm_data = ohdr->u.imm_data;
-		}
-		hdrsize += 4;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		/* FALLTHROUGH */
-	case OP(SEND_LAST):
-	case OP(RDMA_WRITE_LAST):
-	send_last:
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4)))
-			goto nack_inv;
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		wc.byte_len = tlen + qp->r_rcv_len;
-		if (unlikely(wc.byte_len > qp->r_len))
-			goto nack_inv;
-		ipath_copy_sge(&qp->r_sge, data, tlen);
-		qp->r_msn++;
-		if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-			break;
-		wc.wr_id = qp->r_wr_id;
-		wc.status = IB_WC_SUCCESS;
-		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
-		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		else
-			wc.opcode = IB_WC_RECV;
-		wc.qp = &qp->ibqp;
-		wc.src_qp = qp->remote_qpn;
-		wc.slid = qp->remote_ah_attr.dlid;
-		wc.sl = qp->remote_ah_attr.sl;
-		/* Signal completion event if the solicited bit is set. */
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			       (ohdr->bth[0] &
-				cpu_to_be32(1 << 23)) != 0);
-		break;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_ONLY):
-	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE)))
-			goto nack_inv;
-		/* consume RWQE */
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		hdrsize += sizeof(*reth);
-		qp->r_len = be32_to_cpu(reth->length);
-		qp->r_rcv_len = 0;
-		if (qp->r_len != 0) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(qp, &qp->r_sge,
-					   qp->r_len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_WRITE);
-			if (unlikely(!ok))
-				goto nack_acc;
-		} else {
-			qp->r_sge.sg_list = NULL;
-			qp->r_sge.sge.mr = NULL;
-			qp->r_sge.sge.vaddr = NULL;
-			qp->r_sge.sge.length = 0;
-			qp->r_sge.sge.sge_length = 0;
-		}
-		if (opcode == OP(RDMA_WRITE_FIRST))
-			goto send_middle;
-		else if (opcode == OP(RDMA_WRITE_ONLY))
-			goto send_last;
-		if (!ipath_get_rwqe(qp, 1))
-			goto rnr_nak;
-		goto send_last_imm;
-
-	case OP(RDMA_READ_REQUEST): {
-		struct ipath_ack_entry *e;
-		u32 len;
-		u8 next;
-
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto nack_inv;
-		next = qp->r_head_ack_queue + 1;
-		if (next > IPATH_MAX_RDMA_ATOMIC)
-			next = 0;
-		spin_lock_irqsave(&qp->s_lock, flags);
-		/* Double check we can process this while holding the s_lock. */
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-			goto unlock;
-		if (unlikely(next == qp->s_tail_ack_queue)) {
-			if (!qp->s_ack_queue[next].sent)
-				goto nack_inv_unlck;
-			ipath_update_ack_queue(qp, next);
-		}
-		e = &qp->s_ack_queue[qp->r_head_ack_queue];
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		len = be32_to_cpu(reth->length);
-		if (len) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-					   rkey, IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok))
-				goto nack_acc_unlck;
-			/*
-			 * Update the next expected PSN.  We add 1 later
-			 * below, so only add the remainder here.
-			 */
-			if (len > pmtu)
-				qp->r_psn += (len - 1) / pmtu;
-		} else {
-			e->rdma_sge.sg_list = NULL;
-			e->rdma_sge.num_sge = 0;
-			e->rdma_sge.sge.mr = NULL;
-			e->rdma_sge.sge.vaddr = NULL;
-			e->rdma_sge.sge.length = 0;
-			e->rdma_sge.sge.sge_length = 0;
-		}
-		e->opcode = opcode;
-		e->sent = 0;
-		e->psn = psn;
-		/*
-		 * We need to increment the MSN here instead of when we
-		 * finish sending the result since a duplicate request would
-		 * increment it more than once.
-		 */
-		qp->r_msn++;
-		qp->r_psn++;
-		qp->r_state = opcode;
-		qp->r_nak_state = 0;
-		qp->r_head_ack_queue = next;
-
-		/* Schedule the send tasklet. */
-		ipath_schedule_send(qp);
-
-		goto unlock;
-	}
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD): {
-		struct ib_atomic_eth *ateth;
-		struct ipath_ack_entry *e;
-		u64 vaddr;
-		atomic64_t *maddr;
-		u64 sdata;
-		u32 rkey;
-		u8 next;
-
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_inv;
-		next = qp->r_head_ack_queue + 1;
-		if (next > IPATH_MAX_RDMA_ATOMIC)
-			next = 0;
-		spin_lock_irqsave(&qp->s_lock, flags);
-		/* Double check we can process this while holding the s_lock. */
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-			goto unlock;
-		if (unlikely(next == qp->s_tail_ack_queue)) {
-			if (!qp->s_ack_queue[next].sent)
-				goto nack_inv_unlck;
-			ipath_update_ack_queue(qp, next);
-		}
-		if (!header_in_data)
-			ateth = &ohdr->u.atomic_eth;
-		else
-			ateth = (struct ib_atomic_eth *)data;
-		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
-			be32_to_cpu(ateth->vaddr[1]);
-		if (unlikely(vaddr & (sizeof(u64) - 1)))
-			goto nack_inv_unlck;
-		rkey = be32_to_cpu(ateth->rkey);
-		/* Check rkey & NAK */
-		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
-					    sizeof(u64), vaddr, rkey,
-					    IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc_unlck;
-		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = be64_to_cpu(ateth->swap_data);
-		e = &qp->s_ack_queue[qp->r_head_ack_queue];
-		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      be64_to_cpu(ateth->compare_data),
-				      sdata);
-		e->opcode = opcode;
-		e->sent = 0;
-		e->psn = psn & IPATH_PSN_MASK;
-		qp->r_msn++;
-		qp->r_psn++;
-		qp->r_state = opcode;
-		qp->r_nak_state = 0;
-		qp->r_head_ack_queue = next;
-
-		/* Schedule the send tasklet. */
-		ipath_schedule_send(qp);
-
-		goto unlock;
-	}
-
-	default:
-		/* NAK unknown opcodes. */
-		goto nack_inv;
-	}
-	qp->r_psn++;
-	qp->r_state = opcode;
-	qp->r_ack_psn = psn;
-	qp->r_nak_state = 0;
-	/* Send an ACK if requested or required. */
-	if (psn & (1 << 31))
-		goto send_ack;
-	goto done;
-
-rnr_nak:
-	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
-	qp->r_ack_psn = qp->r_psn;
-	goto send_ack;
-
-nack_inv_unlck:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_inv:
-	ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
-	qp->r_ack_psn = qp->r_psn;
-	goto send_ack;
-
-nack_acc_unlck:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_acc:
-	ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
-	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-	qp->r_ack_psn = qp->r_psn;
-send_ack:
-	send_rc_ack(qp);
-	goto done;
-
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_registers.h b/drivers/staging/rdma/ipath/ipath_registers.h
deleted file mode 100644
index 8f44d0c..0000000
--- a/drivers/staging/rdma/ipath/ipath_registers.h
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_REGISTERS_H
-#define _IPATH_REGISTERS_H
-
-/*
- * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for InfiniPath chips.
- */
-
-/*
- * These are the InfiniPath register and buffer bit definitions,
- * that are visible to software, and needed only by the kernel
- * and diag code.  A few, that are visible to protocol and user
- * code are in ipath_common.h.  Some bits are specific
- * to a given chip implementation, and have been moved to the
- * chip-specific source file
- */
-
-/* kr_revision bits */
-#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
-#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
-#define INFINIPATH_R_ARCH_MASK 0xFF
-#define INFINIPATH_R_ARCH_SHIFT 16
-#define INFINIPATH_R_SOFTWARE_MASK 0xFF
-#define INFINIPATH_R_SOFTWARE_SHIFT 24
-#define INFINIPATH_R_BOARDID_MASK 0xFF
-#define INFINIPATH_R_BOARDID_SHIFT 32
-
-/* kr_control bits */
-#define INFINIPATH_C_FREEZEMODE 0x00000002
-#define INFINIPATH_C_LINKENABLE 0x00000004
-
-/* kr_sendctrl bits */
-#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
-#define INFINIPATH_S_UPDTHRESH_SHIFT 24
-#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
-
-#define IPATH_S_ABORT		0
-#define IPATH_S_PIOINTBUFAVAIL	1
-#define IPATH_S_PIOBUFAVAILUPD	2
-#define IPATH_S_PIOENABLE	3
-#define IPATH_S_SDMAINTENABLE	9
-#define IPATH_S_SDMASINGLEDESCRIPTOR	10
-#define IPATH_S_SDMAENABLE	11
-#define IPATH_S_SDMAHALT	12
-#define IPATH_S_DISARM		31
-
-#define INFINIPATH_S_ABORT		(1U << IPATH_S_ABORT)
-#define INFINIPATH_S_PIOINTBUFAVAIL	(1U << IPATH_S_PIOINTBUFAVAIL)
-#define INFINIPATH_S_PIOBUFAVAILUPD	(1U << IPATH_S_PIOBUFAVAILUPD)
-#define INFINIPATH_S_PIOENABLE		(1U << IPATH_S_PIOENABLE)
-#define INFINIPATH_S_SDMAINTENABLE	(1U << IPATH_S_SDMAINTENABLE)
-#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
-					(1U << IPATH_S_SDMASINGLEDESCRIPTOR)
-#define INFINIPATH_S_SDMAENABLE		(1U << IPATH_S_SDMAENABLE)
-#define INFINIPATH_S_SDMAHALT		(1U << IPATH_S_SDMAHALT)
-#define INFINIPATH_S_DISARM		(1U << IPATH_S_DISARM)
-
-/* kr_rcvctrl bits that are the same on multiple chips */
-#define INFINIPATH_R_PORTENABLE_SHIFT 0
-#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_SDMAINT		0x8000000000000000ULL
-#define INFINIPATH_I_SDMADISABLED	0x4000000000000000ULL
-#define INFINIPATH_I_ERROR		0x0000000080000000ULL
-#define INFINIPATH_I_SPIOSENT		0x0000000040000000ULL
-#define INFINIPATH_I_SPIOBUFAVAIL	0x0000000020000000ULL
-#define INFINIPATH_I_GPIO		0x0000000010000000ULL
-#define INFINIPATH_I_JINT		0x0000000004000000ULL
-
-/* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR			0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC			0x0000000000000002ULL
-#define INFINIPATH_E_RICRC			0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN			0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN			0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN		0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN		0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR			0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL			0x0000000000000100ULL
-#define INFINIPATH_E_REBP			0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW			0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION		0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL		0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL		0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID			0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN			0x0000000000008000ULL
-#define INFINIPATH_E_RHDR			0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK		0x0000000000020000ULL
-#define INFINIPATH_E_SENDSPECIALTRIGGER		0x0000000008000000ULL
-#define INFINIPATH_E_SDMADISABLED		0x0000000010000000ULL
-#define INFINIPATH_E_SMINPKTLEN			0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN			0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN			0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN			0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT		0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT		0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH		0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM		0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL			0x0000002000000000ULL
-#define INFINIPATH_E_SENDBUFMISUSE		0x0000004000000000ULL
-#define INFINIPATH_E_SDMAGENMISMATCH		0x0000008000000000ULL
-#define INFINIPATH_E_SDMAOUTOFBOUND		0x0000010000000000ULL
-#define INFINIPATH_E_SDMATAILOUTOFBOUND		0x0000020000000000ULL
-#define INFINIPATH_E_SDMABASE			0x0000040000000000ULL
-#define INFINIPATH_E_SDMA1STDESC		0x0000080000000000ULL
-#define INFINIPATH_E_SDMARPYTAG			0x0000100000000000ULL
-#define INFINIPATH_E_SDMADWEN			0x0000200000000000ULL
-#define INFINIPATH_E_SDMAMISSINGDW		0x0000400000000000ULL
-#define INFINIPATH_E_SDMAUNEXPDATA		0x0000800000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED		0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR		0x0002000000000000ULL
-#define INFINIPATH_E_RESET			0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE			0x0008000000000000ULL
-#define INFINIPATH_E_SDMADESCADDRMISALIGN	0x0010000000000000ULL
-#define INFINIPATH_E_INVALIDEEPCMD		0x0020000000000000ULL
-
-/*
- * this is used to print "common" packet errors only when the
- * __IPATH_ERRPKTDBG bit is set in ipath_debug.
- */
-#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
-		| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
-		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
-		| INFINIPATH_E_REBP )
-
-/* Convenience for decoding Send DMA errors */
-#define INFINIPATH_E_SDMAERRS ( \
-	INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
-	INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
-	INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
-	INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
-	INFINIPATH_E_SDMAUNEXPDATA | \
-	INFINIPATH_E_SDMADESCADDRMISALIGN | \
-	INFINIPATH_E_SDMADISABLED | \
-	INFINIPATH_E_SENDBUFMISUSE)
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
- * 		bit 4: flag buffer, 5: datainfo, 6: header info */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF	0x1ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC	0x2ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
-/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
-/* waldo specific -- find the rest in ipath_6110.c */
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
-#define INFINIPATH_HWE_MEMBISTFAILED	0x0040000000000000ULL
-
-/* kr_hwdiagctrl bits */
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_DC_COUNTERDISABLE            0x1000000000000000ULL
-#define INFINIPATH_DC_COUNTERWREN               0x2000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-
-/* kr_ibcctrl bits */
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
-#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
-/* cycle through TS1/TS2 till OK */
-#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
-/* wait for TS1, then go on */
-#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
-#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
-#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_DOWN 1		/* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2		/* move to 0x21 */
-#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3	/* move to 0x31 */
-#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
-#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
-#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
-#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
-#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
-#define INFINIPATH_IBCC_LOOPBACK             0x8000000000000000ULL
-#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
-
-/* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
-#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-
-#define INFINIPATH_IBCS_TXREADY       0x40000000
-#define INFINIPATH_IBCS_TXCREDITOK    0x80000000
-/* link training states (shift by
-   INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
-#define INFINIPATH_IBCS_LT_STATE_DISABLED	0x00
-#define INFINIPATH_IBCS_LT_STATE_LINKUP		0x01
-#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE	0x02
-#define INFINIPATH_IBCS_LT_STATE_POLLQUIET	0x03
-#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY	0x04
-#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET	0x05
-#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE	0x08
-#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG	0x09
-#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT	0x0a
-#define INFINIPATH_IBCS_LT_STATE_CFGIDLE	0x0b
-#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN	0x0c
-#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT	0x0e
-#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE	0x0f
-/* link state machine states (shift by ibcs_ls_shift) */
-#define INFINIPATH_IBCS_L_STATE_DOWN		0x0
-#define INFINIPATH_IBCS_L_STATE_INIT		0x1
-#define INFINIPATH_IBCS_L_STATE_ARM		0x2
-#define INFINIPATH_IBCS_L_STATE_ACTIVE		0x3
-#define INFINIPATH_IBCS_L_STATE_ACT_DEFER	0x4
-
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
-#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
-#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
-
-/* kr_extctrl bits */
-#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
-#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
-#define INFINIPATH_EXTC_SERDESENABLE         0x80000000ULL
-#define INFINIPATH_EXTC_SERDESCONNECT        0x40000000ULL
-#define INFINIPATH_EXTC_SERDESENTRUNKING     0x20000000ULL
-#define INFINIPATH_EXTC_SERDESDISRXFIFO      0x10000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK1       0x08000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK2       0x04000000ULL
-#define INFINIPATH_EXTC_SERDESENENCDEC       0x02000000ULL
-#define INFINIPATH_EXTC_LED1SECPORT_ON       0x00000020ULL
-#define INFINIPATH_EXTC_LED2SECPORT_ON       0x00000010ULL
-#define INFINIPATH_EXTC_LED1PRIPORT_ON       0x00000008ULL
-#define INFINIPATH_EXTC_LED2PRIPORT_ON       0x00000004ULL
-#define INFINIPATH_EXTC_LEDGBLOK_ON          0x00000002ULL
-#define INFINIPATH_EXTC_LEDGBLERR_OFF        0x00000001ULL
-
-/* kr_partitionkey bits */
-#define INFINIPATH_PKEY_SIZE 16
-#define INFINIPATH_PKEY_MASK 0xFFFF
-#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
-
-/* kr_serdesconfig0 bits */
-#define INFINIPATH_SERDC0_RESET_MASK  0xfULL	/* overal reset bits */
-#define INFINIPATH_SERDC0_RESET_PLL   0x10000000ULL	/* pll reset */
-/* tx idle enables (per lane) */
-#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL
-/* rx detect enables (per lane) */
-#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
-/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
-#define INFINIPATH_SERDC0_L1PWR_DN	 0xF0ULL
-
-/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
-#define INFINIPATH_XGXS_RX_POL_SHIFT 19
-#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-
-
-/*
- * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
- * PIO send buffers.  This is well beyond anything currently
- * defined in the InfiniBand spec.
- */
-#define IPATH_PIO_MAXIBHDR 128
-
-typedef u64 ipath_err_t;
-
-/* The following change with the type of device, so
- * need to be part of the ipath_devdata struct, or
- * we could have problems plugging in devices of
- * different types (e.g. one HT, one PCIE)
- * in one system, to be managed by one driver.
- * On the other hand, this file is may also be included
- * by other code, so leave the declarations here
- * temporarily. Minor footprint issue if common-model
- * linker used, none if C89+ linker used.
- */
-
-/* mask of defined bits for various registers */
-extern u64 infinipath_i_bitsextant;
-extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-
-/* masks that are different in various chips, or only exist in some chips */
-extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
-/*
- * These are the infinipath general register numbers (not offsets).
- * The kernel registers are used directly, those beyond the kernel
- * registers are calculated from one of the base registers.  The use of
- * an integer type doesn't allow type-checking as thorough as, say,
- * an enum but allows for better hiding of chip differences.
- */
-typedef const u16 ipath_kreg,	/* infinipath general registers */
- ipath_creg,			/* infinipath counter registers */
- ipath_sreg;			/* kernel-only, infinipath send registers */
-
-/*
- * These are the chip registers common to all infinipath chips, and
- * used both by the kernel and the diagnostics or other user code.
- * They are all implemented such that 64 bit accesses work.
- * Some implement no more than 32 bits.  Because 64 bit reads
- * require 2 HT cmds on opteron, we access those with 32 bit
- * reads for efficiency (they are written as 64 bits, since
- * the extra 32 bits are nearly free on writes, and it slightly reduces
- * complexity).  The rest are all accessed as 64 bits.
- */
-struct ipath_kregs {
-	/* These are the 32 bit group */
-	ipath_kreg kr_control;
-	ipath_kreg kr_counterregbase;
-	ipath_kreg kr_intmask;
-	ipath_kreg kr_intstatus;
-	ipath_kreg kr_pagealign;
-	ipath_kreg kr_portcnt;
-	ipath_kreg kr_rcvtidbase;
-	ipath_kreg kr_rcvtidcnt;
-	ipath_kreg kr_rcvegrbase;
-	ipath_kreg kr_rcvegrcnt;
-	ipath_kreg kr_scratch;
-	ipath_kreg kr_sendctrl;
-	ipath_kreg kr_sendpiobufbase;
-	ipath_kreg kr_sendpiobufcnt;
-	ipath_kreg kr_sendpiosize;
-	ipath_kreg kr_sendregbase;
-	ipath_kreg kr_userregbase;
-	/* These are the 64 bit group */
-	ipath_kreg kr_debugport;
-	ipath_kreg kr_debugportselect;
-	ipath_kreg kr_errorclear;
-	ipath_kreg kr_errormask;
-	ipath_kreg kr_errorstatus;
-	ipath_kreg kr_extctrl;
-	ipath_kreg kr_extstatus;
-	ipath_kreg kr_gpio_clear;
-	ipath_kreg kr_gpio_mask;
-	ipath_kreg kr_gpio_out;
-	ipath_kreg kr_gpio_status;
-	ipath_kreg kr_hwdiagctrl;
-	ipath_kreg kr_hwerrclear;
-	ipath_kreg kr_hwerrmask;
-	ipath_kreg kr_hwerrstatus;
-	ipath_kreg kr_ibcctrl;
-	ipath_kreg kr_ibcstatus;
-	ipath_kreg kr_intblocked;
-	ipath_kreg kr_intclear;
-	ipath_kreg kr_interruptconfig;
-	ipath_kreg kr_mdio;
-	ipath_kreg kr_partitionkey;
-	ipath_kreg kr_rcvbthqp;
-	ipath_kreg kr_rcvbufbase;
-	ipath_kreg kr_rcvbufsize;
-	ipath_kreg kr_rcvctrl;
-	ipath_kreg kr_rcvhdrcnt;
-	ipath_kreg kr_rcvhdrentsize;
-	ipath_kreg kr_rcvhdrsize;
-	ipath_kreg kr_rcvintmembase;
-	ipath_kreg kr_rcvintmemsize;
-	ipath_kreg kr_revision;
-	ipath_kreg kr_sendbuffererror;
-	ipath_kreg kr_sendpioavailaddr;
-	ipath_kreg kr_serdesconfig0;
-	ipath_kreg kr_serdesconfig1;
-	ipath_kreg kr_serdesstatus;
-	ipath_kreg kr_txintmembase;
-	ipath_kreg kr_txintmemsize;
-	ipath_kreg kr_xgxsconfig;
-	ipath_kreg kr_ibpllcfg;
-	/* use these two (and the following N ports) only with
-	 * ipath_k*_kreg64_port(); not *kreg64() */
-	ipath_kreg kr_rcvhdraddr;
-	ipath_kreg kr_rcvhdrtailaddr;
-
-	/* remaining registers are not present on all types of infinipath
-	   chips  */
-	ipath_kreg kr_rcvpktledcnt;
-	ipath_kreg kr_pcierbuftestreg0;
-	ipath_kreg kr_pcierbuftestreg1;
-	ipath_kreg kr_pcieq0serdesconfig0;
-	ipath_kreg kr_pcieq0serdesconfig1;
-	ipath_kreg kr_pcieq0serdesstatus;
-	ipath_kreg kr_pcieq1serdesconfig0;
-	ipath_kreg kr_pcieq1serdesconfig1;
-	ipath_kreg kr_pcieq1serdesstatus;
-	ipath_kreg kr_hrtbt_guid;
-	ipath_kreg kr_ibcddrctrl;
-	ipath_kreg kr_ibcddrstatus;
-	ipath_kreg kr_jintreload;
-
-	/* send dma related regs */
-	ipath_kreg kr_senddmabase;
-	ipath_kreg kr_senddmalengen;
-	ipath_kreg kr_senddmatail;
-	ipath_kreg kr_senddmahead;
-	ipath_kreg kr_senddmaheadaddr;
-	ipath_kreg kr_senddmabufmask0;
-	ipath_kreg kr_senddmabufmask1;
-	ipath_kreg kr_senddmabufmask2;
-	ipath_kreg kr_senddmastatus;
-
-	/* SerDes related regs (IBA7220-only) */
-	ipath_kreg kr_ibserdesctrl;
-	ipath_kreg kr_ib_epbacc;
-	ipath_kreg kr_ib_epbtrans;
-	ipath_kreg kr_pcie_epbacc;
-	ipath_kreg kr_pcie_epbtrans;
-	ipath_kreg kr_ib_ddsrxeq;
-};
-
-struct ipath_cregs {
-	ipath_creg cr_badformatcnt;
-	ipath_creg cr_erricrccnt;
-	ipath_creg cr_errlinkcnt;
-	ipath_creg cr_errlpcrccnt;
-	ipath_creg cr_errpkey;
-	ipath_creg cr_errrcvflowctrlcnt;
-	ipath_creg cr_err_rlencnt;
-	ipath_creg cr_errslencnt;
-	ipath_creg cr_errtidfull;
-	ipath_creg cr_errtidvalid;
-	ipath_creg cr_errvcrccnt;
-	ipath_creg cr_ibstatuschange;
-	ipath_creg cr_intcnt;
-	ipath_creg cr_invalidrlencnt;
-	ipath_creg cr_invalidslencnt;
-	ipath_creg cr_lbflowstallcnt;
-	ipath_creg cr_iblinkdowncnt;
-	ipath_creg cr_iblinkerrrecovcnt;
-	ipath_creg cr_ibsymbolerrcnt;
-	ipath_creg cr_pktrcvcnt;
-	ipath_creg cr_pktrcvflowctrlcnt;
-	ipath_creg cr_pktsendcnt;
-	ipath_creg cr_pktsendflowcnt;
-	ipath_creg cr_portovflcnt;
-	ipath_creg cr_rcvebpcnt;
-	ipath_creg cr_rcvovflcnt;
-	ipath_creg cr_rxdroppktcnt;
-	ipath_creg cr_senddropped;
-	ipath_creg cr_sendstallcnt;
-	ipath_creg cr_sendunderruncnt;
-	ipath_creg cr_unsupvlcnt;
-	ipath_creg cr_wordrcvcnt;
-	ipath_creg cr_wordsendcnt;
-	ipath_creg cr_vl15droppedpktcnt;
-	ipath_creg cr_rxotherlocalphyerrcnt;
-	ipath_creg cr_excessbufferovflcnt;
-	ipath_creg cr_locallinkintegrityerrcnt;
-	ipath_creg cr_rxvlerrcnt;
-	ipath_creg cr_rxdlidfltrcnt;
-	ipath_creg cr_psstat;
-	ipath_creg cr_psstart;
-	ipath_creg cr_psinterval;
-	ipath_creg cr_psrcvdatacount;
-	ipath_creg cr_psrcvpktscount;
-	ipath_creg cr_psxmitdatacount;
-	ipath_creg cr_psxmitpktscount;
-	ipath_creg cr_psxmitwaitcount;
-};
-
-#endif				/* _IPATH_REGISTERS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
deleted file mode 100644
index e541a01..0000000
--- a/drivers/staging/rdma/ipath/ipath_ruc.c
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/*
- * Convert the AETH RNR timeout code into the number of milliseconds.
- */
-const u32 ib_ipath_rnr_table[32] = {
-	656,			/* 0 */
-	1,			/* 1 */
-	1,			/* 2 */
-	1,			/* 3 */
-	1,			/* 4 */
-	1,			/* 5 */
-	1,			/* 6 */
-	1,			/* 7 */
-	1,			/* 8 */
-	1,			/* 9 */
-	1,			/* A */
-	1,			/* B */
-	1,			/* C */
-	1,			/* D */
-	2,			/* E */
-	2,			/* F */
-	3,			/* 10 */
-	4,			/* 11 */
-	6,			/* 12 */
-	8,			/* 13 */
-	11,			/* 14 */
-	16,			/* 15 */
-	21,			/* 16 */
-	31,			/* 17 */
-	41,			/* 18 */
-	62,			/* 19 */
-	82,			/* 1A */
-	123,			/* 1B */
-	164,			/* 1C */
-	246,			/* 1D */
-	328,			/* 1E */
-	492			/* 1F */
-};
-
-/**
- * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
- * @qp: the QP
- *
- * Called with the QP s_lock held and interrupts disabled.
- * XXX Use a simple list for now.  We might need a priority
- * queue if we have lots of QPs waiting for RNR timeouts
- * but that should be rare.
- */
-void ipath_insert_rnr_queue(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-
-	/* We already did a spin_lock_irqsave(), so just use spin_lock */
-	spin_lock(&dev->pending_lock);
-	if (list_empty(&dev->rnrwait))
-		list_add(&qp->timerwait, &dev->rnrwait);
-	else {
-		struct list_head *l = &dev->rnrwait;
-		struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
-						  timerwait);
-
-		while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
-			qp->s_rnr_timeout -= nqp->s_rnr_timeout;
-			l = l->next;
-			if (l->next == &dev->rnrwait) {
-				nqp = NULL;
-				break;
-			}
-			nqp = list_entry(l->next, struct ipath_qp,
-					 timerwait);
-		}
-		if (nqp)
-			nqp->s_rnr_timeout -= qp->s_rnr_timeout;
-		list_add(&qp->timerwait, l);
-	}
-	spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_init_sge - Validate a RWQE and fill in the SGE state
- * @qp: the QP
- *
- * Return 1 if OK.
- */
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-		   u32 *lengthp, struct ipath_sge_state *ss)
-{
-	int i, j, ret;
-	struct ib_wc wc;
-
-	*lengthp = 0;
-	for (i = j = 0; i < wqe->num_sge; i++) {
-		if (wqe->sg_list[i].length == 0)
-			continue;
-		/* Check LKEY */
-		if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
-				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
-			goto bad_lkey;
-		*lengthp += wqe->sg_list[i].length;
-		j++;
-	}
-	ss->num_sge = j;
-	ret = 1;
-	goto bail;
-
-bad_lkey:
-	memset(&wc, 0, sizeof(wc));
-	wc.wr_id = wqe->wr_id;
-	wc.status = IB_WC_LOC_PROT_ERR;
-	wc.opcode = IB_WC_RECV;
-	wc.qp = &qp->ibqp;
-	/* Signal solicited completion event. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return 0 if no RWQE is available, otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
-{
-	unsigned long flags;
-	struct ipath_rq *rq;
-	struct ipath_rwq *wq;
-	struct ipath_srq *srq;
-	struct ipath_rwqe *wqe;
-	void (*handler)(struct ib_event *, void *);
-	u32 tail;
-	int ret;
-
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		handler = srq->ibsrq.event_handler;
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		handler = NULL;
-		rq = &qp->r_rq;
-	}
-
-	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		ret = 0;
-		goto unlock;
-	}
-
-	wq = rq->wq;
-	tail = wq->tail;
-	/* Validate tail before using it since it is user writable. */
-	if (tail >= rq->size)
-		tail = 0;
-	do {
-		if (unlikely(tail == wq->head)) {
-			ret = 0;
-			goto unlock;
-		}
-		/* Make sure entry is read after head index is read. */
-		smp_rmb();
-		wqe = get_rwqe_ptr(rq, tail);
-		if (++tail >= rq->size)
-			tail = 0;
-		if (wr_id_only)
-			break;
-		qp->r_sge.sg_list = qp->r_sg_list;
-	} while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
-	qp->r_wr_id = wqe->wr_id;
-	wq->tail = tail;
-
-	ret = 1;
-	set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
-	if (handler) {
-		u32 n;
-
-		/*
-		 * validate head pointer value and compute
-		 * the number of remaining WQEs.
-		 */
-		n = wq->head;
-		if (n >= rq->size)
-			n = 0;
-		if (n < tail)
-			n += rq->size - tail;
-		else
-			n -= tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			handler(&ev, srq->ibsrq.srq_context);
-			goto bail;
-		}
-	}
-unlock:
-	spin_unlock_irqrestore(&rq->lock, flags);
-bail:
-	return ret;
-}
-
-/**
- * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from ipath_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ipath_ruc_loopback(struct ipath_qp *sqp)
-{
-	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-	struct ipath_qp *qp;
-	struct ipath_swqe *wqe;
-	struct ipath_sge *sge;
-	unsigned long flags;
-	struct ib_wc wc;
-	u64 sdata;
-	atomic64_t *maddr;
-	enum ib_wc_status send_status;
-
-	/*
-	 * Note that we check the responder QP state after
-	 * checking the requester's state.
-	 */
-	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
-
-	spin_lock_irqsave(&sqp->s_lock, flags);
-
-	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-		goto unlock;
-
-	sqp->s_flags |= IPATH_S_BUSY;
-
-again:
-	if (sqp->s_last == sqp->s_head)
-		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
-
-	/* Return if it is not OK to start a new work reqeust. */
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
-			goto clr_busy;
-		/* We are in the error state, flush the work request. */
-		send_status = IB_WC_WR_FLUSH_ERR;
-		goto flush_send;
-	}
-
-	/*
-	 * We can rely on the entry not changing without the s_lock
-	 * being held until we update s_last.
-	 * We increment s_cur to indicate s_last is in progress.
-	 */
-	if (sqp->s_last == sqp->s_cur) {
-		if (++sqp->s_cur >= sqp->s_size)
-			sqp->s_cur = 0;
-	}
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		dev->n_pkt_drops++;
-		/*
-		 * For RC, the requester would timeout and retry so
-		 * shortcut the timeouts and just signal too many retries.
-		 */
-		if (sqp->ibqp.qp_type == IB_QPT_RC)
-			send_status = IB_WC_RETRY_EXC_ERR;
-		else
-			send_status = IB_WC_SUCCESS;
-		goto serr;
-	}
-
-	memset(&wc, 0, sizeof wc);
-	send_status = IB_WC_SUCCESS;
-
-	sqp->s_sge.sge = wqe->sg_list[0];
-	sqp->s_sge.sg_list = wqe->sg_list + 1;
-	sqp->s_sge.num_sge = wqe->wr.num_sge;
-	sqp->s_len = wqe->length;
-	switch (wqe->wr.opcode) {
-	case IB_WR_SEND_WITH_IMM:
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		/* FALLTHROUGH */
-	case IB_WR_SEND:
-		if (!ipath_get_rwqe(qp, 0))
-			goto rnr_nak;
-		break;
-
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		if (!ipath_get_rwqe(qp, 1))
-			goto rnr_nak;
-		/* FALLTHROUGH */
-	case IB_WR_RDMA_WRITE:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		if (wqe->length == 0)
-			break;
-		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
-					    wqe->rdma_wr.remote_addr,
-					    wqe->rdma_wr.rkey,
-					    IB_ACCESS_REMOTE_WRITE)))
-			goto acc_err;
-		break;
-
-	case IB_WR_RDMA_READ:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-			goto inv_err;
-		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
-					    wqe->rdma_wr.remote_addr,
-					    wqe->rdma_wr.rkey,
-					    IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->wr.num_sge;
-		break;
-
-	case IB_WR_ATOMIC_CMP_AND_SWP:
-	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-			goto inv_err;
-		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-					    wqe->atomic_wr.remote_addr,
-					    wqe->atomic_wr.rkey,
-					    IB_ACCESS_REMOTE_ATOMIC)))
-			goto acc_err;
-		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *) sqp->s_sge.sge.vaddr =
-			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      sdata, wqe->atomic_wr.swap);
-		goto send_comp;
-
-	default:
-		send_status = IB_WC_LOC_QP_OP_ERR;
-		goto serr;
-	}
-
-	sge = &sqp->s_sge.sge;
-	while (sqp->s_len) {
-		u32 len = sqp->s_len;
-
-		if (len > sge->length)
-			len = sge->length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--sqp->s_sge.num_sge)
-				*sge = *sqp->s_sge.sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		sqp->s_len -= len;
-	}
-
-	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-		goto send_comp;
-
-	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-	else
-		wc.opcode = IB_WC_RECV;
-	wc.wr_id = qp->r_wr_id;
-	wc.status = IB_WC_SUCCESS;
-	wc.byte_len = wqe->length;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = qp->remote_qpn;
-	wc.slid = qp->remote_ah_attr.dlid;
-	wc.sl = qp->remote_ah_attr.sl;
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-flush_send:
-	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-	ipath_send_complete(sqp, wqe, send_status);
-	goto again;
-
-rnr_nak:
-	/* Handle RNR NAK */
-	if (qp->ibqp.qp_type == IB_QPT_UC)
-		goto send_comp;
-	/*
-	 * Note: we don't need the s_lock held since the BUSY flag
-	 * makes this single threaded.
-	 */
-	if (sqp->s_rnr_retry == 0) {
-		send_status = IB_WC_RNR_RETRY_EXC_ERR;
-		goto serr;
-	}
-	if (sqp->s_rnr_retry_cnt < 7)
-		sqp->s_rnr_retry--;
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
-		goto clr_busy;
-	sqp->s_flags |= IPATH_S_WAITING;
-	dev->n_rnr_naks++;
-	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
-	ipath_insert_rnr_queue(sqp);
-	goto clr_busy;
-
-inv_err:
-	send_status = IB_WC_REM_INV_REQ_ERR;
-	wc.status = IB_WC_LOC_QP_OP_ERR;
-	goto err;
-
-acc_err:
-	send_status = IB_WC_REM_ACCESS_ERR;
-	wc.status = IB_WC_LOC_PROT_ERR;
-err:
-	/* responder goes to error state */
-	ipath_rc_error(qp, wc.status);
-
-serr:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	ipath_send_complete(sqp, wqe, send_status);
-	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-		sqp->s_flags &= ~IPATH_S_BUSY;
-		spin_unlock_irqrestore(&sqp->s_lock, flags);
-		if (lastwqe) {
-			struct ib_event ev;
-
-			ev.device = sqp->ibqp.device;
-			ev.element.qp = &sqp->ibqp;
-			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-		}
-		goto done;
-	}
-clr_busy:
-	sqp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-	if (qp && atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-}
-
-static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
-{
-	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
-	    qp->ibqp.qp_type == IB_QPT_SMI) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-}
-
-/**
- * ipath_no_bufs_available - tell the layer driver we need buffers
- * @qp: the QP that caused the problem
- * @dev: the device we ran out of buffers on
- *
- * Called when we run out of PIO buffers.
- * If we are now in the error state, return zero to flush the
- * send work request.
- */
-static int ipath_no_bufs_available(struct ipath_qp *qp,
-				    struct ipath_ibdev *dev)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	/*
-	 * Note that as soon as want_buffer() is called and
-	 * possibly before it returns, ipath_ib_piobufavail()
-	 * could be called. Therefore, put QP on the piowait list before
-	 * enabling the PIO avail interrupt.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-		dev->n_piowait++;
-		qp->s_flags |= IPATH_S_WAITING;
-		qp->s_flags &= ~IPATH_S_BUSY;
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->piowait))
-			list_add_tail(&qp->piowait, &dev->piowait);
-		spin_unlock(&dev->pending_lock);
-	} else
-		ret = 0;
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	if (ret)
-		want_buffer(dev->dd, qp);
-	return ret;
-}
-
-/**
- * ipath_make_grh - construct a GRH header
- * @dev: a pointer to the ipath device
- * @hdr: a pointer to the GRH header being constructed
- * @grh: the global route address to send to
- * @hwords: the number of 32 bit words of header being sent
- * @nwords: the number of 32 bit words of data being sent
- *
- * Return the size of the header in 32 bit words.
- */
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-		   struct ib_global_route *grh, u32 hwords, u32 nwords)
-{
-	hdr->version_tclass_flow =
-		cpu_to_be32((6 << 28) |
-			    (grh->traffic_class << 20) |
-			    grh->flow_label);
-	hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
-	/* next_hdr is defined by C8-7 in ch. 8.4.1 */
-	hdr->next_hdr = 0x1B;
-	hdr->hop_limit = grh->hop_limit;
-	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
-	hdr->dgid = grh->dgid;
-
-	/* GRH header size in 32-bit words. */
-	return sizeof(struct ib_grh) / sizeof(u32);
-}
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-			   struct ipath_other_headers *ohdr,
-			   u32 bth0, u32 bth2)
-{
-	u16 lrh0;
-	u32 nwords;
-	u32 extra_bytes;
-
-	/* Construct the header. */
-	extra_bytes = -qp->s_cur_size & 3;
-	nwords = (qp->s_cur_size + extra_bytes) >> 2;
-	lrh0 = IPATH_LRH_BTH;
-	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-						 &qp->remote_ah_attr.grh,
-						 qp->s_hdrwords, nwords);
-		lrh0 = IPATH_LRH_GRH;
-	}
-	lrh0 |= qp->remote_ah_attr.sl << 4;
-	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
-				       qp->remote_ah_attr.src_path_bits);
-	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
-	bth0 |= extra_bytes << 20;
-	ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
-	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(bth2);
-}
-
-/**
- * ipath_do_send - perform a send on a QP
- * @data: contains a pointer to the QP
- *
- * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, two threads could send packets out of order.
- */
-void ipath_do_send(unsigned long data)
-{
-	struct ipath_qp *qp = (struct ipath_qp *)data;
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	int (*make_req)(struct ipath_qp *qp);
-	unsigned long flags;
-
-	if ((qp->ibqp.qp_type == IB_QPT_RC ||
-	     qp->ibqp.qp_type == IB_QPT_UC) &&
-	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
-		ipath_ruc_loopback(qp);
-		goto bail;
-	}
-
-	if (qp->ibqp.qp_type == IB_QPT_RC)
-	       make_req = ipath_make_rc_req;
-	else if (qp->ibqp.qp_type == IB_QPT_UC)
-	       make_req = ipath_make_uc_req;
-	else
-	       make_req = ipath_make_ud_req;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Return if we are already busy processing a work request. */
-	if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-	    !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		goto bail;
-	}
-
-	qp->s_flags |= IPATH_S_BUSY;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-again:
-	/* Check for a constructed packet to be sent. */
-	if (qp->s_hdrwords != 0) {
-		/*
-		 * If no PIO bufs are available, return.  An interrupt will
-		 * call ipath_ib_piobufavail() when one is available.
-		 */
-		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
-				     qp->s_cur_sge, qp->s_cur_size)) {
-			if (ipath_no_bufs_available(qp, dev))
-				goto bail;
-		}
-		dev->n_unicast_xmit++;
-		/* Record that we sent the packet and s_hdr is empty. */
-		qp->s_hdrwords = 0;
-	}
-
-	if (make_req(qp))
-		goto again;
-
-bail:;
-}
-
-/*
- * This should be called with s_lock held.
- */
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-			 enum ib_wc_status status)
-{
-	u32 old_last, last;
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-		return;
-
-	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-	    status != IB_WC_SUCCESS) {
-		struct ib_wc wc;
-
-		memset(&wc, 0, sizeof wc);
-		wc.wr_id = wqe->wr.wr_id;
-		wc.status = status;
-		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		wc.qp = &qp->ibqp;
-		if (status == IB_WC_SUCCESS)
-			wc.byte_len = wqe->length;
-		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-			       status != IB_WC_SUCCESS);
-	}
-
-	old_last = last = qp->s_last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
-	if (qp->s_cur == old_last)
-		qp->s_cur = last;
-	if (qp->s_tail == old_last)
-		qp->s_tail = last;
-	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-		qp->s_draining = 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sdma.c b/drivers/staging/rdma/ipath/ipath_sdma.c
deleted file mode 100644
index 1ffc06a..0000000
--- a/drivers/staging/rdma/ipath/ipath_sdma.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
-
-static void vl15_watchdog_enq(struct ipath_devdata *dd)
-{
-	/* ipath_sdma_lock must already be held */
-	if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
-		unsigned long interval = (HZ + 19) / 20;
-		dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
-		add_timer(&dd->ipath_sdma_vl15_timer);
-	}
-}
-
-static void vl15_watchdog_deq(struct ipath_devdata *dd)
-{
-	/* ipath_sdma_lock must already be held */
-	if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
-		unsigned long interval = (HZ + 19) / 20;
-		mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
-	} else {
-		del_timer(&dd->ipath_sdma_vl15_timer);
-	}
-}
-
-static void vl15_watchdog_timeout(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-	if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
-		ipath_dbg("vl15 watchdog timeout - clearing\n");
-		ipath_cancel_sends(dd, 1);
-		ipath_hol_down(dd);
-	} else {
-		ipath_dbg("vl15 watchdog timeout - "
-			  "condition already cleared\n");
-	}
-}
-
-static void unmap_desc(struct ipath_devdata *dd, unsigned head)
-{
-	__le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
-	u64 desc[2];
-	dma_addr_t addr;
-	size_t len;
-
-	desc[0] = le64_to_cpu(descqp[0]);
-	desc[1] = le64_to_cpu(descqp[1]);
-
-	addr = (desc[1] << 32) | (desc[0] >> 32);
-	len = (desc[0] >> 14) & (0x7ffULL << 2);
-	dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
-}
-
-/*
- * ipath_sdma_lock should be locked before calling this.
- */
-int ipath_sdma_make_progress(struct ipath_devdata *dd)
-{
-	struct list_head *lp = NULL;
-	struct ipath_sdma_txreq *txp = NULL;
-	u16 dmahead;
-	u16 start_idx = 0;
-	int progress = 0;
-
-	if (!list_empty(&dd->ipath_sdma_activelist)) {
-		lp = dd->ipath_sdma_activelist.next;
-		txp = list_entry(lp, struct ipath_sdma_txreq, list);
-		start_idx = txp->start_idx;
-	}
-
-	/*
-	 * Read the SDMA head register in order to know that the
-	 * interrupt clear has been written to the chip.
-	 * Otherwise, we may not get an interrupt for the last
-	 * descriptor in the queue.
-	 */
-	dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
-	/* sanity check return value for error handling (chip reset, etc.) */
-	if (dmahead >= dd->ipath_sdma_descq_cnt)
-		goto done;
-
-	while (dd->ipath_sdma_descq_head != dmahead) {
-		if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
-		    dd->ipath_sdma_descq_head == start_idx) {
-			unmap_desc(dd, dd->ipath_sdma_descq_head);
-			start_idx++;
-			if (start_idx == dd->ipath_sdma_descq_cnt)
-				start_idx = 0;
-		}
-
-		/* increment free count and head */
-		dd->ipath_sdma_descq_removed++;
-		if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
-			dd->ipath_sdma_descq_head = 0;
-
-		if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
-			/* move to notify list */
-			if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-				vl15_watchdog_deq(dd);
-			list_move_tail(lp, &dd->ipath_sdma_notifylist);
-			if (!list_empty(&dd->ipath_sdma_activelist)) {
-				lp = dd->ipath_sdma_activelist.next;
-				txp = list_entry(lp, struct ipath_sdma_txreq,
-						 list);
-				start_idx = txp->start_idx;
-			} else {
-				lp = NULL;
-				txp = NULL;
-			}
-		}
-		progress = 1;
-	}
-
-	if (progress)
-		tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-done:
-	return progress;
-}
-
-static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
-{
-	struct ipath_sdma_txreq *txp, *txp_next;
-
-	list_for_each_entry_safe(txp, txp_next, list, list) {
-		list_del_init(&txp->list);
-
-		if (txp->callback)
-			(*txp->callback)(txp->callback_cookie,
-					 txp->callback_status);
-	}
-}
-
-static void sdma_notify_taskbody(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	struct list_head list;
-
-	INIT_LIST_HEAD(&list);
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	list_splice_init(&dd->ipath_sdma_notifylist, &list);
-
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	ipath_sdma_notify(dd, &list);
-
-	/*
-	 * The IB verbs layer needs to see the callback before getting
-	 * the call to ipath_ib_piobufavail() because the callback
-	 * handles releasing resources the next send will need.
-	 * Otherwise, we could do these calls in
-	 * ipath_sdma_make_progress().
-	 */
-	ipath_ib_piobufavail(dd->verbs_dev);
-}
-
-static void sdma_notify_task(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-	if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		sdma_notify_taskbody(dd);
-}
-
-static void dump_sdma_state(struct ipath_devdata *dd)
-{
-	unsigned long reg;
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
-	ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
-	ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
-	ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
-	ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
-	ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-	ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-	ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
-}
-
-static void sdma_abort_task(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-	u64 status;
-	unsigned long flags;
-
-	if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		return;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
-
-	/* nothing to do */
-	if (status == IPATH_SDMA_ABORT_NONE)
-		goto unlock;
-
-	/* ipath_sdma_abort() is done, waiting for interrupt */
-	if (status == IPATH_SDMA_ABORT_DISARMED) {
-		if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
-			goto resched_noprint;
-		/* give up, intr got lost somewhere */
-		ipath_dbg("give up waiting for SDMADISABLED intr\n");
-		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-		status = IPATH_SDMA_ABORT_ABORTED;
-	}
-
-	/* everything is stopped, time to clean up and restart */
-	if (status == IPATH_SDMA_ABORT_ABORTED) {
-		struct ipath_sdma_txreq *txp, *txpnext;
-		u64 hwstatus;
-		int notify = 0;
-
-		hwstatus = ipath_read_kreg64(dd,
-				dd->ipath_kregs->kr_senddmastatus);
-
-		if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
-				 IPATH_SDMA_STATUS_ABORT_IN_PROG	     |
-				 IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
-		    !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
-			if (dd->ipath_sdma_reset_wait > 0) {
-				/* not done shutting down sdma */
-				--dd->ipath_sdma_reset_wait;
-				goto resched;
-			}
-			ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
-				"status after SDMA reset, continuing\n");
-			dump_sdma_state(dd);
-		}
-
-		/* dequeue all "sent" requests */
-		list_for_each_entry_safe(txp, txpnext,
-					 &dd->ipath_sdma_activelist, list) {
-			txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
-			if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-				vl15_watchdog_deq(dd);
-			list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-			notify = 1;
-		}
-		if (notify)
-			tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-		/* reset our notion of head and tail */
-		dd->ipath_sdma_descq_tail = 0;
-		dd->ipath_sdma_descq_head = 0;
-		dd->ipath_sdma_head_dma[0] = 0;
-		dd->ipath_sdma_generation = 0;
-		dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
-
-		/* Reset SendDmaLenGen */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
-			(u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
-
-		/* done with sdma state for a bit */
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-		/*
-		 * Don't restart sdma here (with the exception
-		 * below). Wait until link is up to ACTIVE.  VL15 MADs
-		 * used to bring the link up use PIO, and multiple link
-		 * transitions otherwise cause the sdma engine to be
-		 * stopped and started multiple times.
-		 * The disable is done here, including the shadow,
-		 * so the state is kept consistent.
-		 * See ipath_restart_sdma() for the actual starting
-		 * of sdma.
-		 */
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-		/* make sure I see next message */
-		dd->ipath_sdma_abort_jiffies = 0;
-
-		/*
-		 * Not everything that takes SDMA offline is a link
-		 * status change.  If the link was up, restart SDMA.
-		 */
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			ipath_restart_sdma(dd);
-
-		goto done;
-	}
-
-resched:
-	/*
-	 * for now, keep spinning
-	 * JAG - this is bad to just have default be a loop without
-	 * state change
-	 */
-	if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
-		ipath_dbg("looping with status 0x%08lx\n",
-			  dd->ipath_sdma_status);
-		dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
-	}
-resched_noprint:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-	return;
-
-unlock:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-done:
-	return;
-}
-
-/*
- * This is called from interrupt context.
- */
-void ipath_sdma_intr(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	(void) ipath_sdma_make_progress(dd);
-
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-}
-
-static int alloc_sdma(struct ipath_devdata *dd)
-{
-	int ret = 0;
-
-	/* Allocate memory for SendDMA descriptor FIFO */
-	dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
-		SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
-
-	if (!dd->ipath_sdma_descq) {
-		ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
-			"FIFO memory\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	dd->ipath_sdma_descq_cnt =
-		SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
-
-	/* Allocate memory for DMA of head register to memory */
-	dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
-		PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
-	if (!dd->ipath_sdma_head_dma) {
-		ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
-		ret = -ENOMEM;
-		goto cleanup_descq;
-	}
-	dd->ipath_sdma_head_dma[0] = 0;
-
-	setup_timer(&dd->ipath_sdma_vl15_timer, vl15_watchdog_timeout,
-			(unsigned long)dd);
-
-	atomic_set(&dd->ipath_sdma_vl15_count, 0);
-
-	goto done;
-
-cleanup_descq:
-	dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-		(void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
-	dd->ipath_sdma_descq = NULL;
-	dd->ipath_sdma_descq_phys = 0;
-done:
-	return ret;
-}
-
-int setup_sdma(struct ipath_devdata *dd)
-{
-	int ret = 0;
-	unsigned i, n;
-	u64 tmp64;
-	u64 senddmabufmask[3] = { 0 };
-	unsigned long flags;
-
-	ret = alloc_sdma(dd);
-	if (ret)
-		goto done;
-
-	if (!dd->ipath_sdma_descq) {
-		ipath_dev_err(dd, "SendDMA memory not allocated\n");
-		goto done;
-	}
-
-	/*
-	 * Set initial status as if we had been up, then gone down.
-	 * This lets initial start on transition to ACTIVE be the
-	 * same as restart after link flap.
-	 */
-	dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
-	dd->ipath_sdma_abort_jiffies = 0;
-	dd->ipath_sdma_generation = 0;
-	dd->ipath_sdma_descq_tail = 0;
-	dd->ipath_sdma_descq_head = 0;
-	dd->ipath_sdma_descq_removed = 0;
-	dd->ipath_sdma_descq_added = 0;
-
-	/* Set SendDmaBase */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
-			 dd->ipath_sdma_descq_phys);
-	/* Set SendDmaLenGen */
-	tmp64 = dd->ipath_sdma_descq_cnt;
-	tmp64 |= 1<<18; /* enable generation checking */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
-	/* Set SendDmaTail */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
-			 dd->ipath_sdma_descq_tail);
-	/* Set SendDmaHeadAddr */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
-			 dd->ipath_sdma_head_phys);
-
-	/*
-	 * Reserve all the former "kernel" piobufs, using high number range
-	 * so we get as many 4K buffers as possible
-	 */
-	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
-	ipath_chg_pioavailkernel(dd, i, n - i , 0);
-	for (; i < n; ++i) {
-		unsigned word = i / 64;
-		unsigned bit = i & 63;
-		BUG_ON(word >= 3);
-		senddmabufmask[word] |= 1ULL << bit;
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
-			 senddmabufmask[0]);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
-			 senddmabufmask[1]);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
-			 senddmabufmask[2]);
-
-	INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
-	INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
-
-	tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
-		     (unsigned long) dd);
-	tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
-		     (unsigned long) dd);
-
-	/*
-	 * No use to turn on SDMA here, as link is probably not ACTIVE
-	 * Just mark it RUNNING and enable the interrupt, and let the
-	 * ipath_restart_sdma() on link transition to ACTIVE actually
-	 * enable it.
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	__set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-done:
-	return ret;
-}
-
-void teardown_sdma(struct ipath_devdata *dd)
-{
-	struct ipath_sdma_txreq *txp, *txpnext;
-	unsigned long flags;
-	dma_addr_t sdma_head_phys = 0;
-	dma_addr_t sdma_descq_phys = 0;
-	void *sdma_descq = NULL;
-	void *sdma_head_dma = NULL;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	__clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-	__set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-	__set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	tasklet_kill(&dd->ipath_sdma_abort_task);
-	tasklet_kill(&dd->ipath_sdma_notify_task);
-
-	/* turn off sdma */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	/* dequeue all "sent" requests */
-	list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
-				 list) {
-		txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
-		if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-			vl15_watchdog_deq(dd);
-		list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-	}
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	sdma_notify_taskbody(dd);
-
-	del_timer_sync(&dd->ipath_sdma_vl15_timer);
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	dd->ipath_sdma_abort_jiffies = 0;
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
-
-	if (dd->ipath_sdma_head_dma) {
-		sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
-		sdma_head_phys = dd->ipath_sdma_head_phys;
-		dd->ipath_sdma_head_dma = NULL;
-		dd->ipath_sdma_head_phys = 0;
-	}
-
-	if (dd->ipath_sdma_descq) {
-		sdma_descq = dd->ipath_sdma_descq;
-		sdma_descq_phys = dd->ipath_sdma_descq_phys;
-		dd->ipath_sdma_descq = NULL;
-		dd->ipath_sdma_descq_phys = 0;
-	}
-
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	if (sdma_head_dma)
-		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-				  sdma_head_dma, sdma_head_phys);
-
-	if (sdma_descq)
-		dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-				  sdma_descq, sdma_descq_phys);
-}
-
-/*
- * [Re]start SDMA, if we use it, and it's not already OK.
- * This is called on transition to link ACTIVE, either the first or
- * subsequent times.
- */
-void ipath_restart_sdma(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	int needed = 1;
-
-	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-		goto bail;
-
-	/*
-	 * First, make sure we should, which is to say,
-	 * check that we are "RUNNING" (not in teardown)
-	 * and not "SHUTDOWN"
-	 */
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
-		|| test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-			needed = 0;
-	else {
-		__clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-		__clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-		__clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-	}
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	if (!needed) {
-		ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
-			dd->ipath_sdma_status);
-		goto bail;
-	}
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	/*
-	 * First clear, just to be safe. Enable is only done
-	 * in chip on 0->1 transition
-	 */
-	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/* notify upper layers */
-	ipath_ib_piobufavail(dd->verbs_dev);
-
-bail:
-	return;
-}
-
-static inline void make_sdma_desc(struct ipath_devdata *dd,
-	u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
-{
-	WARN_ON(addr & 3);
-	/* SDmaPhyAddr[47:32] */
-	sdmadesc[1] = addr >> 32;
-	/* SDmaPhyAddr[31:0] */
-	sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
-	/* SDmaGeneration[1:0] */
-	sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
-	/* SDmaDwordCount[10:0] */
-	sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
-	/* SDmaBufOffset[12:2] */
-	sdmadesc[0] |= dwoffset & 0x7ffULL;
-}
-
-/*
- * This function queues one IB packet onto the send DMA queue per call.
- * The caller is responsible for checking:
- * 1) The number of send DMA descriptor entries is less than the size of
- *    the descriptor queue.
- * 2) The IB SGE addresses and lengths are 32-bit aligned
- *    (except possibly the last SGE's length)
- * 3) The SGE addresses are suitable for passing to dma_map_single().
- */
-int ipath_sdma_verbs_send(struct ipath_devdata *dd,
-	struct ipath_sge_state *ss, u32 dwords,
-	struct ipath_verbs_txreq *tx)
-{
-
-	unsigned long flags;
-	struct ipath_sge *sge;
-	int ret = 0;
-	u16 tail;
-	__le64 *descqp;
-	u64 sdmadesc[2];
-	u32 dwoffset;
-	dma_addr_t addr;
-
-	if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
-		ipath_dbg("packet size %X > ibmax %X, fail\n",
-			tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
-		ret = -EMSGSIZE;
-		goto fail;
-	}
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-retry:
-	if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
-		if (ipath_sdma_make_progress(dd))
-			goto retry;
-		ret = -ENOBUFS;
-		goto unlock;
-	}
-
-	addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
-			      tx->map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&dd->pcidev->dev, addr))
-		goto ioerr;
-
-	dwoffset = tx->map_len >> 2;
-	make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
-
-	/* SDmaFirstDesc */
-	sdmadesc[0] |= 1ULL << 12;
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-		sdmadesc[0] |= 1ULL << 14;	/* SDmaUseLargeBuf */
-
-	/* write to the descq */
-	tail = dd->ipath_sdma_descq_tail;
-	descqp = &dd->ipath_sdma_descq[tail].qw[0];
-	*descqp++ = cpu_to_le64(sdmadesc[0]);
-	*descqp++ = cpu_to_le64(sdmadesc[1]);
-
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
-		tx->txreq.start_idx = tail;
-
-	/* increment the tail */
-	if (++tail == dd->ipath_sdma_descq_cnt) {
-		tail = 0;
-		descqp = &dd->ipath_sdma_descq[0].qw[0];
-		++dd->ipath_sdma_generation;
-	}
-
-	sge = &ss->sge;
-	while (dwords) {
-		u32 dw;
-		u32 len;
-
-		len = dwords << 2;
-		if (len > sge->length)
-			len = sge->length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		dw = (len + 3) >> 2;
-		addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
-				      DMA_TO_DEVICE);
-		if (dma_mapping_error(&dd->pcidev->dev, addr))
-			goto unmap;
-		make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
-		/* SDmaUseLargeBuf has to be set in every descriptor */
-		if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-			sdmadesc[0] |= 1ULL << 14;
-		/* write to the descq */
-		*descqp++ = cpu_to_le64(sdmadesc[0]);
-		*descqp++ = cpu_to_le64(sdmadesc[1]);
-
-		/* increment the tail */
-		if (++tail == dd->ipath_sdma_descq_cnt) {
-			tail = 0;
-			descqp = &dd->ipath_sdma_descq[0].qw[0];
-			++dd->ipath_sdma_generation;
-		}
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-
-		dwoffset += dw;
-		dwords -= dw;
-	}
-
-	if (!tail)
-		descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
-	descqp -= 2;
-	/* SDmaLastDesc */
-	descqp[0] |= cpu_to_le64(1ULL << 11);
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
-		/* SDmaIntReq */
-		descqp[0] |= cpu_to_le64(1ULL << 15);
-	}
-
-	/* Commit writes to memory and advance the tail on the chip */
-	wmb();
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-
-	tx->txreq.next_descq_idx = tail;
-	tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
-	dd->ipath_sdma_descq_tail = tail;
-	dd->ipath_sdma_descq_added += tx->txreq.sg_count;
-	list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
-		vl15_watchdog_enq(dd);
-	goto unlock;
-
-unmap:
-	while (tail != dd->ipath_sdma_descq_tail) {
-		if (!tail)
-			tail = dd->ipath_sdma_descq_cnt - 1;
-		else
-			tail--;
-		unmap_desc(dd, tail);
-	}
-ioerr:
-	ret = -EIO;
-unlock:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-fail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_srq.c b/drivers/staging/rdma/ipath/ipath_srq.c
deleted file mode 100644
index 2627198..0000000
--- a/drivers/staging/rdma/ipath/ipath_srq.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			   struct ib_recv_wr **bad_wr)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct ipath_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libipathverbs when creating a user SRQ
- */
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-				struct ib_srq_init_attr *srq_init_attr,
-				struct ib_udata *udata)
-{
-	struct ipath_ibdev *dev = to_idev(ibpd->device);
-	struct ipath_srq *srq;
-	u32 sz;
-	struct ib_srq *ret;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
-
-	if (srq_init_attr->attr.max_wr == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
-	    (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
-	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
-	if (!srq->rq.wq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_srq;
-	}
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See ipath_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-		u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
-
-		srq->ip =
-		    ipath_create_mmap_info(dev, s,
-					   ibpd->uobject->context,
-					   srq->rq.wq);
-		if (!srq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wq;
-		}
-
-		err = ib_copy_to_udata(udata, &srq->ip->offset,
-				       sizeof(srq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		srq->ip = NULL;
-
-	/*
-	 * ib_create_srq() will initialize srq->ibsrq.
-	 */
-	spin_lock_init(&srq->rq.lock);
-	srq->rq.wq->head = 0;
-	srq->rq.wq->tail = 0;
-	srq->limit = srq_init_attr->attr.srq_limit;
-
-	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
-		spin_unlock(&dev->n_srqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
- 	dev->n_srqs_allocated++;
-	spin_unlock(&dev->n_srqs_lock);
-
-	if (srq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &srq->ibsrq;
-	goto done;
-
-bail_ip:
-	kfree(srq->ip);
-bail_wq:
-	vfree(srq->rq.wq);
-bail_srq:
-	kfree(srq);
-done:
-	return ret;
-}
-
-/**
- * ipath_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for ipathverbs.so
- */
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask,
-		     struct ib_udata *udata)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_rwq *wq;
-	int ret = 0;
-
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwq *owq;
-		struct ipath_rwqe *p;
-		u32 sz, size, n, head, tail;
-
-		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		sz = sizeof(struct ipath_rwqe) +
-			srq->rq.max_sge * sizeof(struct ib_sge);
-		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		/* Check that we can write the offset to mmap. */
-		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
-			__u64 offset = 0;
-
-			ret = ib_copy_from_udata(&offset_addr, udata,
-						 sizeof(offset_addr));
-			if (ret)
-				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
-				goto bail_free;
-		}
-
-		spin_lock_irq(&srq->rq.lock);
-		/*
-		 * validate head pointer value and compute
-		 * the number of remaining WQEs.
-		 */
-		owq = srq->rq.wq;
-		head = owq->head;
-		if (head >= srq->rq.size)
-			head = 0;
-		tail = owq->tail;
-		if (tail >= srq->rq.size)
-			tail = 0;
-		n = head;
-		if (n < tail)
-			n += srq->rq.size - tail;
-		else
-			n -= tail;
-		if (size <= n) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = 0;
-		p = wq->wq;
-		while (tail != head) {
-			struct ipath_rwqe *wqe;
-			int i;
-
-			wqe = get_rwqe_ptr(&srq->rq, tail);
-			p->wr_id = wqe->wr_id;
-			p->num_sge = wqe->num_sge;
-			for (i = 0; i < wqe->num_sge; i++)
-				p->sg_list[i] = wqe->sg_list[i];
-			n++;
-			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++tail >= srq->rq.size)
-				tail = 0;
-		}
-		srq->rq.wq = wq;
-		srq->rq.size = size;
-		wq->head = n;
-		wq->tail = 0;
-		if (attr_mask & IB_SRQ_LIMIT)
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-
-		vfree(owq);
-
-		if (srq->ip) {
-			struct ipath_mmap_info *ip = srq->ip;
-			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct ipath_rwq) + size * sz;
-
-			ipath_update_mmap_info(dev, ip, s, wq);
-
-			/*
-			 * Return the offset to mmap.
-			 * See ipath_mmap() for details.
-			 */
-			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					goto bail;
-			}
-
-			spin_lock_irq(&dev->pending_lock);
-			if (list_empty(&ip->pending_mmaps))
-				list_add(&ip->pending_mmaps,
-					 &dev->pending_mmaps);
-			spin_unlock_irq(&dev->pending_lock);
-		}
-	} else if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irq(&srq->rq.lock);
-		if (attr->srq_limit >= srq->rq.size)
-			ret = -EINVAL;
-		else
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-	}
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&srq->rq.lock);
-bail_free:
-	vfree(wq);
-bail:
-	return ret;
-}
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-
-	attr->max_wr = srq->rq.size - 1;
-	attr->max_sge = srq->rq.max_sge;
-	attr->srq_limit = srq->limit;
-	return 0;
-}
-
-/**
- * ipath_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int ipath_destroy_srq(struct ib_srq *ibsrq)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
-
-	spin_lock(&dev->n_srqs_lock);
-	dev->n_srqs_allocated--;
-	spin_unlock(&dev->n_srqs_lock);
-	if (srq->ip)
-		kref_put(&srq->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(srq->rq.wq);
-	kfree(srq);
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_stats.c b/drivers/staging/rdma/ipath/ipath_stats.c
deleted file mode 100644
index f63e143..0000000
--- a/drivers/staging/rdma/ipath/ipath_stats.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_kernel.h"
-
-struct infinipath_stats ipath_stats;
-
-/**
- * ipath_snap_cntr - snapshot a chip counter
- * @dd: the infinipath device
- * @creg: the counter to snapshot
- *
- * called from add_timer and user counter read calls, to deal with
- * counters that wrap in "human time".  The words sent and received, and
- * the packets sent and received are all that we worry about.  For now,
- * at least, we don't worry about error counters, because if they wrap
- * that quickly, we probably don't care.  We may eventually just make this
- * handle all the counters.  word counters can wrap in about 20 seconds
- * of full bandwidth traffic, packet counters in a few hours.
- */
-
-u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
-{
-	u32 val, reg64 = 0;
-	u64 val64;
-	unsigned long t0, t1;
-	u64 ret;
-
-	t0 = jiffies;
-	/* If fast increment counters are only 32 bits, snapshot them,
-	 * and maintain them as 64bit values in the driver */
-	if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
-	    (creg == dd->ipath_cregs->cr_wordsendcnt ||
-	     creg == dd->ipath_cregs->cr_wordrcvcnt ||
-	     creg == dd->ipath_cregs->cr_pktsendcnt ||
-	     creg == dd->ipath_cregs->cr_pktrcvcnt)) {
-		val64 = ipath_read_creg(dd, creg);
-		val = val64 == ~0ULL ? ~0U : 0;
-		reg64 = 1;
-	} else			/* val64 just to keep gcc quiet... */
-		val64 = val = ipath_read_creg32(dd, creg);
-	/*
-	 * See if a second has passed.  This is just a way to detect things
-	 * that are quite broken.  Normally this should take just a few
-	 * cycles (the check is for long enough that we don't care if we get
-	 * pre-empted.)  An Opteron HT O read timeout is 4 seconds with
-	 * normal NB values
-	 */
-	t1 = jiffies;
-	if (time_before(t0 + HZ, t1) && val == -1) {
-		ipath_dev_err(dd, "Error!  Read counter 0x%x timed out\n",
-			      creg);
-		ret = 0ULL;
-		goto bail;
-	}
-	if (reg64) {
-		ret = val64;
-		goto bail;
-	}
-
-	if (creg == dd->ipath_cregs->cr_wordsendcnt) {
-		if (val != dd->ipath_lastsword) {
-			dd->ipath_sword += val - dd->ipath_lastsword;
-			dd->ipath_lastsword = val;
-		}
-		val64 = dd->ipath_sword;
-	} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
-		if (val != dd->ipath_lastrword) {
-			dd->ipath_rword += val - dd->ipath_lastrword;
-			dd->ipath_lastrword = val;
-		}
-		val64 = dd->ipath_rword;
-	} else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
-		if (val != dd->ipath_lastspkts) {
-			dd->ipath_spkts += val - dd->ipath_lastspkts;
-			dd->ipath_lastspkts = val;
-		}
-		val64 = dd->ipath_spkts;
-	} else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
-		if (val != dd->ipath_lastrpkts) {
-			dd->ipath_rpkts += val - dd->ipath_lastrpkts;
-			dd->ipath_lastrpkts = val;
-		}
-		val64 = dd->ipath_rpkts;
-	} else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
-		if (dd->ibdeltainprog)
-			val64 -= val64 - dd->ibsymsnap;
-		val64 -= dd->ibsymdelta;
-	} else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
-		if (dd->ibdeltainprog)
-			val64 -= val64 - dd->iblnkerrsnap;
-		val64 -= dd->iblnkerrdelta;
-	} else
-		val64 = (u64) val;
-
-	ret = val64;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
- * @dd: the infinipath device
- *
- * print the delta of egrfull/hdrqfull errors for kernel ports no more than
- * every 5 seconds.  User processes are printed at close, but kernel doesn't
- * close, so...  Separate routine so may call from other places someday, and
- * so function name when printed by _IPATH_INFO is meaningfull
- */
-static void ipath_qcheck(struct ipath_devdata *dd)
-{
-	static u64 last_tot_hdrqfull;
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-	size_t blen = 0;
-	char buf[128];
-	u32 hdrqtail;
-
-	*buf = 0;
-	if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
-		blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
-				pd->port_hdrqfull -
-				dd->ipath_p0_hdrqfull);
-		dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
-	}
-	if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
-		blen += snprintf(buf + blen, sizeof buf - blen,
-				 "%srcvegrfull %llu",
-				 blen ? ", " : "",
-				 (unsigned long long)
-				 (ipath_stats.sps_etidfull -
-				  dd->ipath_last_tidfull));
-		dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
-	}
-
-	/*
-	 * this is actually the number of hdrq full interrupts, not actual
-	 * events, but at the moment that's mostly what I'm interested in.
-	 * Actual count, etc. is in the counters, if needed.  For production
-	 * users this won't ordinarily be printed.
-	 */
-
-	if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
-	    ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
-		blen += snprintf(buf + blen, sizeof buf - blen,
-				 "%shdrqfull %llu (all ports)",
-				 blen ? ", " : "",
-				 (unsigned long long)
-				 (ipath_stats.sps_hdrqfull -
-				  last_tot_hdrqfull));
-		last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
-	}
-	if (blen)
-		ipath_dbg("%s\n", buf);
-
-	hdrqtail = ipath_get_hdrqtail(pd);
-	if (pd->port_head != hdrqtail) {
-		if (dd->ipath_lastport0rcv_cnt ==
-		    ipath_stats.sps_port0pkts) {
-			ipath_cdbg(PKT, "missing rcv interrupts? "
-				   "port0 hd=%x tl=%x; port0pkts %llx; write"
-				   " hd (w/intr)\n",
-				   pd->port_head, hdrqtail,
-				   (unsigned long long)
-				   ipath_stats.sps_port0pkts);
-			ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
-				dd->ipath_rhdrhead_intr_off, pd->port_port);
-		}
-		dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
-	}
-}
-
-static void ipath_chk_errormask(struct ipath_devdata *dd)
-{
-	static u32 fixed;
-	u32 ctrl;
-	unsigned long errormask;
-	unsigned long hwerrs;
-
-	if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
-		return;
-
-	errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-
-	if (errormask == dd->ipath_errormask)
-		return;
-	fixed++;
-
-	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-
-	if ((hwerrs & dd->ipath_hwerrmask) ||
-		(ctrl & INFINIPATH_C_FREEZEMODE)) {
-		/* force re-interrupt of pending events, just in case */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-		dev_info(&dd->pcidev->dev,
-			"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
-			fixed, errormask, (unsigned long)dd->ipath_errormask,
-			ctrl, hwerrs);
-	} else
-		ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
-			fixed, errormask,
-			(unsigned long)dd->ipath_errormask);
-}
-
-
-/**
- * ipath_get_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the infinipath device ipath_devdata
- *
- * called from add_timer
- */
-void ipath_get_faststats(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-	int i;
-	static unsigned cnt;
-	unsigned long flags;
-	u64 traffic_wds;
-
-	/*
-	 * don't access the chip while running diags, or memory diags can
-	 * fail
-	 */
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
-	    ipath_diag_inuse)
-		/* but re-arm the timer, for diags case; won't hurt other */
-		goto done;
-
-	/*
-	 * We now try to maintain a "active timer", based on traffic
-	 * exceeding a threshold, so we need to check the word-counts
-	 * even if they are 64-bit.
-	 */
-	traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-	traffic_wds -= dd->ipath_traffic_wds;
-	dd->ipath_traffic_wds += traffic_wds;
-	if (traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
-		atomic_add(5, &dd->ipath_active_time); /* S/B #define */
-	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-
-	if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	}
-
-	ipath_qcheck(dd);
-
-	/*
-	 * deal with repeat error suppression.  Doesn't really matter if
-	 * last error was almost a full interval ago, or just a few usecs
-	 * ago; still won't get more than 2 per interval.  We may want
-	 * longer intervals for this eventually, could do with mod, counter
-	 * or separate timer.  Also see code in ipath_handle_errors() and
-	 * ipath_handle_hwerrors().
-	 */
-
-	if (dd->ipath_lasterror)
-		dd->ipath_lasterror = 0;
-	if (dd->ipath_lasthwerror)
-		dd->ipath_lasthwerror = 0;
-	if (dd->ipath_maskederrs
-	    && time_after(jiffies, dd->ipath_unmasktime)) {
-		char ebuf[256];
-		int iserr;
-		iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
-					 dd->ipath_maskederrs);
-		if (dd->ipath_maskederrs &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-		      INFINIPATH_E_PKTERRS))
-			ipath_dev_err(dd, "Re-enabling masked errors "
-				      "(%s)\n", ebuf);
-		else {
-			/*
-			 * rcvegrfull and rcvhdrqfull are "normal", for some
-			 * types of processes (mostly benchmarks) that send
-			 * huge numbers of messages, while not processing
-			 * them.  So only complain about these at debug
-			 * level.
-			 */
-			if (iserr)
-				ipath_dbg(
-					"Re-enabling queue full errors (%s)\n",
-					ebuf);
-			else
-				ipath_cdbg(ERRPKT, "Re-enabling packet"
-					" problem interrupt (%s)\n", ebuf);
-		}
-
-		/* re-enable masked errors */
-		dd->ipath_errormask |= dd->ipath_maskederrs;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-				 dd->ipath_errormask);
-		dd->ipath_maskederrs = 0;
-	}
-
-	/* limit qfull messages to ~one per minute per port */
-	if ((++cnt & 0x10)) {
-		for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
-			struct ipath_portdata *pd = dd->ipath_pd[i];
-
-			if (pd && pd->port_lastrcvhdrqtail != -1)
-				pd->port_lastrcvhdrqtail = -1;
-		}
-	}
-
-	ipath_chk_errormask(dd);
-done:
-	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sysfs.c b/drivers/staging/rdma/ipath/ipath_sysfs.c
deleted file mode 100644
index b12b1f6..0000000
--- a/drivers/staging/rdma/ipath/ipath_sysfs.c
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/**
- * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * returns the number of bytes consumed, or negative value on error
- */
-int ipath_parse_ushort(const char *str, unsigned short *valp)
-{
-	unsigned long val;
-	char *end;
-	int ret;
-
-	if (!isdigit(str[0])) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	val = simple_strtoul(str, &end, 0);
-
-	if (val > 0xffff) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*valp = val;
-
-	ret = end + 1 - str;
-	if (ret == 0)
-		ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-static ssize_t show_version(struct device_driver *dev, char *buf)
-{
-	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
-}
-
-static ssize_t show_num_units(struct device_driver *dev, char *buf)
-{
-	return scnprintf(buf, PAGE_SIZE, "%d\n",
-			 ipath_count_units(NULL, NULL, NULL));
-}
-
-static ssize_t show_status(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-
-	if (!dd->ipath_statusp) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
-			(unsigned long long) *(dd->ipath_statusp));
-
-bail:
-	return ret;
-}
-
-static const char *ipath_status_str[] = {
-	"Initted",
-	"Disabled",
-	"Admin_Disabled",
-	"", /* This used to be the old "OIB_SMA" status. */
-	"", /* This used to be the old "SMA" status. */
-	"Present",
-	"IB_link_up",
-	"IB_configured",
-	"NoIBcable",
-	"Fatal_Hardware_Error",
-	NULL,
-};
-
-static ssize_t show_status_str(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int i, any;
-	u64 s;
-	ssize_t ret;
-
-	if (!dd->ipath_statusp) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	s = *(dd->ipath_statusp);
-	*buf = '\0';
-	for (any = i = 0; s && ipath_status_str[i]; i++) {
-		if (s & 1) {
-			if (any && strlcat(buf, " ", PAGE_SIZE) >=
-			    PAGE_SIZE)
-				/* overflow */
-				break;
-			if (strlcat(buf, ipath_status_str[i],
-				    PAGE_SIZE) >= PAGE_SIZE)
-				break;
-			any = 1;
-		}
-		s >>= 1;
-	}
-	if (any)
-		strlcat(buf, "\n", PAGE_SIZE);
-
-	ret = strlen(buf);
-
-bail:
-	return ret;
-}
-
-static ssize_t show_boardversion(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
-}
-
-static ssize_t show_localbus_info(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
-}
-
-static ssize_t show_lmc(struct device *dev,
-			struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
-}
-
-static ssize_t store_lmc(struct device *dev,
-			 struct device_attribute *attr,
-			 const char *buf,
-			 size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 lmc = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &lmc);
-	if (ret < 0)
-		goto invalid;
-
-	if (lmc > 7) {
-		ret = -EINVAL;
-		goto invalid;
-	}
-
-	ipath_set_lid(dd, dd->ipath_lid, lmc);
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
-bail:
-	return ret;
-}
-
-static ssize_t show_lid(struct device *dev,
-			struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
-}
-
-static ssize_t store_lid(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 lid = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &lid);
-	if (ret < 0)
-		goto invalid;
-
-	if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
-		ret = -EINVAL;
-		goto invalid;
-	}
-
-	ipath_set_lid(dd, lid, dd->ipath_lmc);
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
-bail:
-	return ret;
-}
-
-static ssize_t show_mlid(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
-}
-
-static ssize_t store_mlid(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 mlid;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &mlid);
-	if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
-		goto invalid;
-
-	dd->ipath_mlid = mlid;
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid MLID\n");
-bail:
-	return ret;
-}
-
-static ssize_t show_guid(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u8 *guid;
-
-	guid = (u8 *) & (dd->ipath_guid);
-
-	return scnprintf(buf, PAGE_SIZE,
-			 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-			 guid[0], guid[1], guid[2], guid[3],
-			 guid[4], guid[5], guid[6], guid[7]);
-}
-
-static ssize_t store_guid(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-	unsigned short guid[8];
-	__be64 new_guid;
-	u8 *ng;
-	int i;
-
-	if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
-		   &guid[0], &guid[1], &guid[2], &guid[3],
-		   &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
-		goto invalid;
-
-	ng = (u8 *) &new_guid;
-
-	for (i = 0; i < 8; i++) {
-		if (guid[i] > 0xff)
-			goto invalid;
-		ng[i] = guid[i];
-	}
-
-	if (new_guid == 0)
-		goto invalid;
-
-	dd->ipath_guid = new_guid;
-	dd->ipath_nguid = 1;
-	if (dd->verbs_dev)
-		dd->verbs_dev->ibdev.node_guid = new_guid;
-
-	ret = strlen(buf);
-	goto bail;
-
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid GUID\n");
-	ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-static ssize_t show_nguid(struct device *dev,
-			  struct device_attribute *attr,
-			  char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
-}
-
-static ssize_t show_nports(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	/* Return the number of user ports available. */
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
-}
-
-static ssize_t show_serial(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	buf[sizeof dd->ipath_serial] = '\0';
-	memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
-	strcat(buf, "\n");
-	return strlen(buf);
-}
-
-static ssize_t show_unit(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
-}
-
-static ssize_t show_jint_max_packets(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
-}
-
-static ssize_t store_jint_max_packets(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf,
-				      size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 v = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &v);
-	if (ret < 0)
-		ipath_dev_err(dd, "invalid jint_max_packets.\n");
-	else
-		dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
-
-	return ret;
-}
-
-static ssize_t show_jint_idle_ticks(struct device *dev,
-				    struct device_attribute *attr,
-				    char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
-}
-
-static ssize_t store_jint_idle_ticks(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf,
-				     size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 v = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &v);
-	if (ret < 0)
-		ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
-	else
-		dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
-
-	return ret;
-}
-
-#define DEVICE_COUNTER(name, attr) \
-	static ssize_t show_counter_##name(struct device *dev, \
-					   struct device_attribute *attr, \
-					   char *buf) \
-	{ \
-		struct ipath_devdata *dd = dev_get_drvdata(dev); \
-		return scnprintf(\
-			buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
-			ipath_snap_cntr( \
-				dd, offsetof(struct infinipath_counters, \
-					     attr) / sizeof(u64)));	\
-	} \
-	static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
-
-DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
-DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
-DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
-DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
-DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
-DEVICE_COUNTER(lb_ints, LBIntCnt);
-DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
-DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
-DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
-DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
-DEVICE_COUNTER(rx_dwords, RxDwordCnt);
-DEVICE_COUNTER(rx_ebps, RxEBPCnt);
-DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
-DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
-DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
-DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
-DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
-DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
-DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
-DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
-DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
-DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
-DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
-DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
-DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
-DEVICE_COUNTER(tx_dwords, TxDwordCnt);
-DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
-DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
-DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
-DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
-DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
-DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
-
-static struct attribute *dev_counter_attributes[] = {
-	&dev_attr_ib_link_downeds.attr,
-	&dev_attr_ib_link_err_recoveries.attr,
-	&dev_attr_ib_status_changes.attr,
-	&dev_attr_ib_symbol_errs.attr,
-	&dev_attr_lb_flow_stalls.attr,
-	&dev_attr_lb_ints.attr,
-	&dev_attr_rx_bad_formats.attr,
-	&dev_attr_rx_buf_ovfls.attr,
-	&dev_attr_rx_data_pkts.attr,
-	&dev_attr_rx_dropped_pkts.attr,
-	&dev_attr_rx_dwords.attr,
-	&dev_attr_rx_ebps.attr,
-	&dev_attr_rx_flow_ctrl_errs.attr,
-	&dev_attr_rx_flow_pkts.attr,
-	&dev_attr_rx_icrc_errs.attr,
-	&dev_attr_rx_len_errs.attr,
-	&dev_attr_rx_link_problems.attr,
-	&dev_attr_rx_lpcrc_errs.attr,
-	&dev_attr_rx_max_min_len_errs.attr,
-	&dev_attr_rx_p0_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p1_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p2_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p3_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p4_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p5_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p6_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p7_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p8_hdr_egr_ovfls.attr,
-	&dev_attr_rx_pkey_mismatches.attr,
-	&dev_attr_rx_tid_full_errs.attr,
-	&dev_attr_rx_tid_valid_errs.attr,
-	&dev_attr_rx_vcrc_errs.attr,
-	&dev_attr_tx_data_pkts.attr,
-	&dev_attr_tx_dropped_pkts.attr,
-	&dev_attr_tx_dwords.attr,
-	&dev_attr_tx_flow_pkts.attr,
-	&dev_attr_tx_flow_stalls.attr,
-	&dev_attr_tx_len_errs.attr,
-	&dev_attr_tx_max_min_len_errs.attr,
-	&dev_attr_tx_underruns.attr,
-	&dev_attr_tx_unsup_vl_errs.attr,
-	NULL
-};
-
-static struct attribute_group dev_counter_attr_group = {
-	.name = "counters",
-	.attrs = dev_counter_attributes
-};
-
-static ssize_t store_reset(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	if (count < 5 || memcmp(buf, "reset", 5)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (dd->ipath_flags & IPATH_DISABLED) {
-		/*
-		 * post-reset init would re-enable interrupts, etc.
-		 * so don't allow reset on disabled devices.  Not
-		 * perfect error, but about the best choice.
-		 */
-		dev_info(dev,"Unit %d is disabled, can't reset\n",
-			 dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_reset_device(dd->ipath_unit);
-bail:
-	return ret<0 ? ret : count;
-}
-
-static ssize_t store_link_state(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 state;
-
-	ret = ipath_parse_ushort(buf, &state);
-	if (ret < 0)
-		goto invalid;
-
-	r = ipath_set_linkstate(dd, state);
-	if (r < 0) {
-		ret = r;
-		goto bail;
-	}
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid link state\n");
-bail:
-	return ret;
-}
-
-static ssize_t show_mtu(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
-}
-
-static ssize_t store_mtu(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-	u16 mtu = 0;
-	int r;
-
-	ret = ipath_parse_ushort(buf, &mtu);
-	if (ret < 0)
-		goto invalid;
-
-	r = ipath_set_mtu(dd, mtu);
-	if (r < 0)
-		ret = r;
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid MTU\n");
-bail:
-	return ret;
-}
-
-static ssize_t show_enabled(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	return scnprintf(buf, PAGE_SIZE, "%u\n",
-			 (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
-}
-
-static ssize_t store_enabled(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-	u16 enable = 0;
-
-	ret = ipath_parse_ushort(buf, &enable);
-	if (ret < 0) {
-		ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
-		goto bail;
-	}
-
-	if (enable) {
-		if (!(dd->ipath_flags & IPATH_DISABLED))
-			goto bail;
-
-		dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
-		/* same as post-reset */
-		ret = ipath_init_chip(dd, 1);
-		if (ret)
-			ipath_dev_err(dd, "Failed to enable unit %d\n",
-				      dd->ipath_unit);
-		else {
-			dd->ipath_flags &= ~IPATH_DISABLED;
-			*dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
-		}
-	} else if (!(dd->ipath_flags & IPATH_DISABLED)) {
-		dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
-		ipath_shutdown_device(dd);
-		dd->ipath_flags |= IPATH_DISABLED;
-		*dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
-	}
-
-bail:
-	return ret;
-}
-
-static ssize_t store_rx_pol_inv(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret < 0)
-		goto invalid;
-
-	r = ipath_set_rx_pol_inv(dd, val);
-	if (r < 0) {
-		ret = r;
-		goto bail;
-	}
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
-bail:
-	return ret;
-}
-
-static ssize_t store_led_override(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret > 0)
-		ipath_set_led_override(dd, val);
-	else
-		ipath_dev_err(dd, "attempt to set invalid LED override\n");
-	return ret;
-}
-
-static ssize_t show_logged_errs(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int idx, count;
-
-	/* force consistency with actual EEPROM */
-	if (ipath_update_eeprom_log(dd) != 0)
-		return -ENXIO;
-
-	count = 0;
-	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-		count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
-			dd->ipath_eep_st_errs[idx],
-			idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
-	}
-
-	return count;
-}
-
-/*
- * New sysfs entries to control various IB config. These all turn into
- * accesses via ipath_f_get/set_ib_cfg.
- *
- * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
- */
-static ssize_t show_hrtbt_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_hrtbt_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && val > 3)
-		ret = -EINVAL;
-	if (ret < 0) {
-		ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
-		goto bail;
-	}
-
-	/*
-	 * Set the "intentional" heartbeat enable per either of
-	 * "Enable" and "Auto", as these are normally set together.
-	 * This bit is consulted when leaving loopback mode,
-	 * because entering loopback mode overrides it and automatically
-	 * disables heartbeat.
-	 */
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
-	if (r < 0)
-		ret = r;
-	else if (val == IPATH_IB_HRTBT_OFF)
-		dd->ipath_flags |= IPATH_NO_HRTBT;
-	else
-		dd->ipath_flags &= ~IPATH_NO_HRTBT;
-
-bail:
-	return ret;
-}
-
-/*
- * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
- * _not_ the particular encoding of any given chip)
- */
-static ssize_t show_lwid_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_lwid_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && (val == 0 || val > 3))
-		ret = -EINVAL;
-	if (ret < 0) {
-		ipath_dev_err(dd,
-			"attempt to set invalid Link Width (enable)\n");
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-/* Get current link width */
-static ssize_t show_lwid(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-/*
- * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
- */
-static ssize_t show_spd_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_spd_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
-		ret = -EINVAL;
-	if (ret < 0) {
-		ipath_dev_err(dd,
-			"attempt to set invalid Link Speed (enable)\n");
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-/* Get current link speed */
-static ssize_t show_spd(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-/*
- * Get/Set RX polarity-invert enable. 0=no, 1=yes.
- */
-static ssize_t show_rx_polinv_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_rx_polinv_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && val > 1) {
-		ipath_dev_err(dd,
-			"attempt to set invalid Rx Polarity (enable)\n");
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-/*
- * Get/Set RX lane-reversal enable. 0=no, 1=yes.
- */
-static ssize_t show_lanerev_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_lanerev_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && val > 1) {
-		ret = -EINVAL;
-		ipath_dev_err(dd,
-			"attempt to set invalid Lane reversal (enable)\n");
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
-static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
-
-static struct attribute *driver_attributes[] = {
-	&driver_attr_num_units.attr,
-	&driver_attr_version.attr,
-	NULL
-};
-
-static struct attribute_group driver_attr_group = {
-	.attrs = driver_attributes
-};
-
-static ssize_t store_tempsense(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf,
-			       size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, stat;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret <= 0) {
-		ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
-		goto bail;
-	}
-	/* If anything but the highest limit, enable T_CRIT_A "interrupt" */
-	stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
-	if (stat) {
-		ipath_dev_err(dd, "Unable to set tempsense config\n");
-		ret = -1;
-		goto bail;
-	}
-	stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
-	if (stat) {
-		ipath_dev_err(dd, "Unable to set local Tcrit\n");
-		ret = -1;
-		goto bail;
-	}
-	stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
-	if (stat) {
-		ipath_dev_err(dd, "Unable to set remote Tcrit\n");
-		ret = -1;
-		goto bail;
-	}
-
-bail:
-	return ret;
-}
-
-/*
- * dump tempsense regs. in decimal, to ease shell-scripts.
- */
-static ssize_t show_tempsense(struct device *dev,
-			      struct device_attribute *attr,
-			      char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-	int idx;
-	u8 regvals[8];
-
-	ret = -ENXIO;
-	for (idx = 0; idx < 8; ++idx) {
-		if (idx == 6)
-			continue;
-		ret = ipath_tempsense_read(dd, idx);
-		if (ret < 0)
-			break;
-		regvals[idx] = ret;
-	}
-	if (idx == 8)
-		ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
-			*(signed char *)(regvals),
-			*(signed char *)(regvals + 1),
-			regvals[2], regvals[3],
-			*(signed char *)(regvals + 5),
-			*(signed char *)(regvals + 7));
-	return ret;
-}
-
-const struct attribute_group *ipath_driver_attr_groups[] = {
-	&driver_attr_group,
-	NULL,
-};
-
-static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
-static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
-static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
-static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
-static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
-static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
-static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
-static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
-static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
-static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
-static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
-static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
-static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
-		   show_jint_max_packets, store_jint_max_packets);
-static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
-		   show_jint_idle_ticks, store_jint_idle_ticks);
-static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
-		   show_tempsense, store_tempsense);
-
-static struct attribute *dev_attributes[] = {
-	&dev_attr_guid.attr,
-	&dev_attr_lmc.attr,
-	&dev_attr_lid.attr,
-	&dev_attr_link_state.attr,
-	&dev_attr_mlid.attr,
-	&dev_attr_mtu.attr,
-	&dev_attr_nguid.attr,
-	&dev_attr_nports.attr,
-	&dev_attr_serial.attr,
-	&dev_attr_status.attr,
-	&dev_attr_status_str.attr,
-	&dev_attr_boardversion.attr,
-	&dev_attr_unit.attr,
-	&dev_attr_enabled.attr,
-	&dev_attr_rx_pol_inv.attr,
-	&dev_attr_led_override.attr,
-	&dev_attr_logged_errors.attr,
-	&dev_attr_tempsense.attr,
-	&dev_attr_localbus_info.attr,
-	NULL
-};
-
-static struct attribute_group dev_attr_group = {
-	.attrs = dev_attributes
-};
-
-static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
-		   store_hrtbt_enb);
-static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
-		   store_lwid_enb);
-static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
-static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
-		   store_spd_enb);
-static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
-static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
-		   store_rx_polinv_enb);
-static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
-		   store_lanerev_enb);
-
-static struct attribute *dev_ibcfg_attributes[] = {
-	&dev_attr_hrtbt_enable.attr,
-	&dev_attr_link_width_enable.attr,
-	&dev_attr_link_width.attr,
-	&dev_attr_link_speed_enable.attr,
-	&dev_attr_link_speed.attr,
-	&dev_attr_rx_pol_inv_enable.attr,
-	&dev_attr_rx_lane_rev_enable.attr,
-	NULL
-};
-
-static struct attribute_group dev_ibcfg_attr_group = {
-	.attrs = dev_ibcfg_attributes
-};
-
-/**
- * ipath_expose_reset - create a device reset file
- * @dev: the device structure
- *
- * Only expose a file that lets us reset the device after someone
- * enters diag mode.  A device reset is quite likely to crash the
- * machine entirely, so we don't want to normally make it
- * available.
- *
- * Called with ipath_mutex held.
- */
-int ipath_expose_reset(struct device *dev)
-{
-	static int exposed;
-	int ret;
-
-	if (!exposed) {
-		ret = device_create_file(dev, &dev_attr_reset);
-		exposed = 1;
-	} else {
-		ret = 0;
-	}
-
-	return ret;
-}
-
-int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
-{
-	int ret;
-
-	ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
-	if (ret)
-		goto bail;
-
-	ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
-	if (ret)
-		goto bail_attrs;
-
-	if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-		ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
-		if (ret)
-			goto bail_counter;
-		ret = device_create_file(dev, &dev_attr_jint_max_packets);
-		if (ret)
-			goto bail_idle;
-
-		ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
-		if (ret)
-			goto bail_max;
-	}
-
-	return 0;
-
-bail_max:
-	device_remove_file(dev, &dev_attr_jint_max_packets);
-bail_idle:
-	device_remove_file(dev, &dev_attr_jint_idle_ticks);
-bail_counter:
-	sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-bail_attrs:
-	sysfs_remove_group(&dev->kobj, &dev_attr_group);
-bail:
-	return ret;
-}
-
-void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
-{
-	sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-
-	if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-		sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
-		device_remove_file(dev, &dev_attr_jint_idle_ticks);
-		device_remove_file(dev, &dev_attr_jint_max_packets);
-	}
-
-	sysfs_remove_group(&dev->kobj, &dev_attr_group);
-
-	device_remove_file(dev, &dev_attr_reset);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
deleted file mode 100644
index 0246b30..0000000
--- a/drivers/staging/rdma/ipath/ipath_uc.c
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/**
- * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_uc_req(struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	struct ipath_swqe *wqe;
-	unsigned long flags;
-	u32 hwords;
-	u32 bth0;
-	u32 len;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	int ret = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= IPATH_S_WAIT_DMA;
-			goto bail;
-		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
-	}
-
-	ohdr = &qp->s_hdr.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
-
-	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
-	hwords = 5;
-	bth0 = 1 << 22; /* Set M bit */
-
-	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
-	qp->s_wqe = NULL;
-	switch (qp->s_state) {
-	default:
-		if (!(ib_ipath_state_ops[qp->state] &
-		    IPATH_PROCESS_NEXT_SEND_OK))
-			goto bail;
-		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head)
-			goto bail;
-		/*
-		 * Start a new request.
-		 */
-		qp->s_psn = wqe->psn = qp->s_next_psn;
-		qp->s_sge.sge = wqe->sg_list[0];
-		qp->s_sge.sg_list = wqe->sg_list + 1;
-		qp->s_sge.num_sge = wqe->wr.num_sge;
-		qp->s_len = len = wqe->length;
-		switch (wqe->wr.opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_IMM:
-			if (len > pmtu) {
-				qp->s_state = OP(SEND_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_SEND)
-				qp->s_state = OP(SEND_ONLY);
-			else {
-				qp->s_state =
-					OP(SEND_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after the BTH */
-				ohdr->u.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-			}
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-			qp->s_wqe = wqe;
-			if (++qp->s_cur >= qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_RDMA_WRITE:
-		case IB_WR_RDMA_WRITE_WITH_IMM:
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->rdma_wr.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / 4;
-			if (len > pmtu) {
-				qp->s_state = OP(RDMA_WRITE_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
-				qp->s_state =
-					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after the RETH */
-				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-					bth0 |= 1 << 23;
-			}
-			qp->s_wqe = wqe;
-			if (++qp->s_cur >= qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		default:
-			goto bail;
-		}
-		break;
-
-	case OP(SEND_FIRST):
-		qp->s_state = OP(SEND_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_SEND)
-			qp->s_state = OP(SEND_LAST);
-		else {
-			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-		}
-		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-			bth0 |= 1 << 23;
-		qp->s_wqe = wqe;
-		if (++qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-
-	case OP(RDMA_WRITE_FIRST):
-		qp->s_state = OP(RDMA_WRITE_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_MIDDLE):
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
-			qp->s_state =
-				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-		}
-		qp->s_wqe = wqe;
-		if (++qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-	}
-	qp->s_len -= len;
-	qp->s_hdrwords = hwords;
-	qp->s_cur_sge = &qp->s_sge;
-	qp->s_cur_size = len;
-	ipath_make_ruc_header(to_idev(qp->ibqp.device),
-			      qp, ohdr, bth0 | (qp->s_state << 24),
-			      qp->s_next_psn++ & IPATH_PSN_MASK);
-done:
-	ret = 1;
-	goto unlock;
-
-bail:
-	qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * ipath_uc_rcv - handle an incoming UC packet
- * @dev: the device the packet came in on
- * @hdr: the header of the packet
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
- *
- * This is called from ipath_qp_rcv() to process an incoming UC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	int opcode;
-	u32 hdrsize;
-	u32 psn;
-	u32 pad;
-	struct ib_wc wc;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	struct ib_reth *reth;
-	int header_in_data;
-
-	/* Validate the SLID. See Ch. 9.6.1.5 */
-	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-		goto done;
-
-	/* Check for GRH */
-	if (!has_grh) {
-		ohdr = &hdr->u.oth;
-		hdrsize = 8 + 12;	/* LRH + BTH */
-		psn = be32_to_cpu(ohdr->bth[2]);
-		header_in_data = 0;
-	} else {
-		ohdr = &hdr->u.l.oth;
-		hdrsize = 8 + 40 + 12;	/* LRH + GRH + BTH */
-		/*
-		 * The header with GRH is 60 bytes and the
-		 * core driver sets the eager header buffer
-		 * size to 56 bytes so the last 4 bytes of
-		 * the BTH header (PSN) is in the data buffer.
-		 */
-		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-		if (header_in_data) {
-			psn = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		} else
-			psn = be32_to_cpu(ohdr->bth[2]);
-	}
-	/*
-	 * The opcode is in the low byte when its in network order
-	 * (top byte when in host order).
-	 */
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
-	memset(&wc, 0, sizeof wc);
-
-	/* Compare the PSN verses the expected PSN. */
-	if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
-		/*
-		 * Handle a sequence error.
-		 * Silently drop any current message.
-		 */
-		qp->r_psn = psn;
-	inv:
-		qp->r_state = OP(SEND_LAST);
-		switch (opcode) {
-		case OP(SEND_FIRST):
-		case OP(SEND_ONLY):
-		case OP(SEND_ONLY_WITH_IMMEDIATE):
-			goto send_first;
-
-		case OP(RDMA_WRITE_FIRST):
-		case OP(RDMA_WRITE_ONLY):
-		case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-			goto rdma_first;
-
-		default:
-			dev->n_pkt_drops++;
-			goto done;
-		}
-	}
-
-	/* Check for opcode sequence errors. */
-	switch (qp->r_state) {
-	case OP(SEND_FIRST):
-	case OP(SEND_MIDDLE):
-		if (opcode == OP(SEND_MIDDLE) ||
-		    opcode == OP(SEND_LAST) ||
-		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-			break;
-		goto inv;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_MIDDLE):
-		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-		    opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-			break;
-		goto inv;
-
-	default:
-		if (opcode == OP(SEND_FIRST) ||
-		    opcode == OP(SEND_ONLY) ||
-		    opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
-		    opcode == OP(RDMA_WRITE_FIRST) ||
-		    opcode == OP(RDMA_WRITE_ONLY) ||
-		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-			break;
-		goto inv;
-	}
-
-	/* OK, process the packet. */
-	switch (opcode) {
-	case OP(SEND_FIRST):
-	case OP(SEND_ONLY):
-	case OP(SEND_ONLY_WITH_IMMEDIATE):
-	send_first:
-		if (qp->r_flags & IPATH_R_REUSE_SGE) {
-			qp->r_flags &= ~IPATH_R_REUSE_SGE;
-			qp->r_sge = qp->s_rdma_read_sge;
-		} else if (!ipath_get_rwqe(qp, 0)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Save the WQE so we can reuse it in case of an error. */
-		qp->s_rdma_read_sge = qp->r_sge;
-		qp->r_rcv_len = 0;
-		if (opcode == OP(SEND_ONLY))
-			goto send_last;
-		else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
-			goto send_last_imm;
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-		/* Check for invalid length PMTU or posted rwqe len. */
-		if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		qp->r_rcv_len += pmtu;
-		if (unlikely(qp->r_rcv_len > qp->r_len)) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		ipath_copy_sge(&qp->r_sge, data, pmtu);
-		break;
-
-	case OP(SEND_LAST_WITH_IMMEDIATE):
-	send_last_imm:
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else {
-			/* Immediate data comes after BTH */
-			wc.ex.imm_data = ohdr->u.imm_data;
-		}
-		hdrsize += 4;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		/* FALLTHROUGH */
-	case OP(SEND_LAST):
-	send_last:
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		wc.byte_len = tlen + qp->r_rcv_len;
-		if (unlikely(wc.byte_len > qp->r_len)) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		wc.opcode = IB_WC_RECV;
-	last_imm:
-		ipath_copy_sge(&qp->r_sge, data, tlen);
-		wc.wr_id = qp->r_wr_id;
-		wc.status = IB_WC_SUCCESS;
-		wc.qp = &qp->ibqp;
-		wc.src_qp = qp->remote_qpn;
-		wc.slid = qp->remote_ah_attr.dlid;
-		wc.sl = qp->remote_ah_attr.sl;
-		/* Signal completion event if the solicited bit is set. */
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			       (ohdr->bth[0] &
-				cpu_to_be32(1 << 23)) != 0);
-		break;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_ONLY):
-	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
-	rdma_first:
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		hdrsize += sizeof(*reth);
-		qp->r_len = be32_to_cpu(reth->length);
-		qp->r_rcv_len = 0;
-		if (qp->r_len != 0) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			/* Check rkey */
-			ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
-					   vaddr, rkey,
-					   IB_ACCESS_REMOTE_WRITE);
-			if (unlikely(!ok)) {
-				dev->n_pkt_drops++;
-				goto done;
-			}
-		} else {
-			qp->r_sge.sg_list = NULL;
-			qp->r_sge.sge.mr = NULL;
-			qp->r_sge.sge.vaddr = NULL;
-			qp->r_sge.sge.length = 0;
-			qp->r_sge.sge.sge_length = 0;
-		}
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		if (opcode == OP(RDMA_WRITE_ONLY))
-			goto rdma_last;
-		else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-			goto rdma_last_imm;
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_MIDDLE):
-		/* Check for invalid length PMTU or posted rwqe len. */
-		if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		qp->r_rcv_len += pmtu;
-		if (unlikely(qp->r_rcv_len > qp->r_len)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		ipath_copy_sge(&qp->r_sge, data, pmtu);
-		break;
-
-	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-	rdma_last_imm:
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else {
-			/* Immediate data comes after BTH */
-			wc.ex.imm_data = ohdr->u.imm_data;
-		}
-		hdrsize += 4;
-		wc.wc_flags = IB_WC_WITH_IMM;
-
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		if (qp->r_flags & IPATH_R_REUSE_SGE)
-			qp->r_flags &= ~IPATH_R_REUSE_SGE;
-		else if (!ipath_get_rwqe(qp, 1)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		wc.byte_len = qp->r_len;
-		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		goto last_imm;
-
-	case OP(RDMA_WRITE_LAST):
-	rdma_last:
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		ipath_copy_sge(&qp->r_sge, data, tlen);
-		break;
-
-	default:
-		/* Drop packet for unknown opcodes. */
-		dev->n_pkt_drops++;
-		goto done;
-	}
-	qp->r_psn++;
-	qp->r_state = opcode;
-done:
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
deleted file mode 100644
index 385d941..0000000
--- a/drivers/staging/rdma/ipath/ipath_ud.c
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the sending QP
- * @swqe: the send work request
- *
- * This is called from ipath_make_ud_req() to forward a WQE addressed
- * to the same HCA.
- * Note that the receive interrupt handler may be calling ipath_ud_rcv()
- * while this is being called.
- */
-static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
-{
-	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-	struct ipath_qp *qp;
-	struct ib_ah_attr *ah_attr;
-	unsigned long flags;
-	struct ipath_rq *rq;
-	struct ipath_srq *srq;
-	struct ipath_sge_state rsge;
-	struct ipath_sge *sge;
-	struct ipath_rwq *wq;
-	struct ipath_rwqe *wqe;
-	void (*handler)(struct ib_event *, void *);
-	struct ib_wc wc;
-	u32 tail;
-	u32 rlen;
-	u32 length;
-
-	qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
-	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		dev->n_pkt_drops++;
-		goto done;
-	}
-
-	/*
-	 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
-	 * Qkeys with the high order bit set mean use the
-	 * qkey from the QP context instead of the WR (see 10.2.5).
-	 */
-	if (unlikely(qp->ibqp.qp_num &&
-		     ((int) swqe->ud_wr.remote_qkey < 0 ?
-		      sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
-		/* XXX OK to lose a count once in a while. */
-		dev->qkey_violations++;
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-
-	/*
-	 * A GRH is expected to precede the data even if not
-	 * present on the wire.
-	 */
-	length = swqe->length;
-	memset(&wc, 0, sizeof wc);
-	wc.byte_len = length + sizeof(struct ib_grh);
-
-	if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = swqe->wr.ex.imm_data;
-	}
-
-	/*
-	 * This would be a lot simpler if we could call ipath_get_rwqe()
-	 * but that uses state that the receive interrupt handler uses
-	 * so we would need to lock out receive interrupts while doing
-	 * local loopback.
-	 */
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		handler = srq->ibsrq.event_handler;
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		handler = NULL;
-		rq = &qp->r_rq;
-	}
-
-	/*
-	 * Get the next work request entry to find where to put the data.
-	 * Note that it is safe to drop the lock after changing rq->tail
-	 * since ipath_post_receive() won't fill the empty slot.
-	 */
-	spin_lock_irqsave(&rq->lock, flags);
-	wq = rq->wq;
-	tail = wq->tail;
-	/* Validate tail before using it since it is user writable. */
-	if (tail >= rq->size)
-		tail = 0;
-	if (unlikely(tail == wq->head)) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-	wqe = get_rwqe_ptr(rq, tail);
-	rsge.sg_list = qp->r_ud_sg_list;
-	if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-	/* Silently drop packets which are too big. */
-	if (wc.byte_len > rlen) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-	if (++tail >= rq->size)
-		tail = 0;
-	wq->tail = tail;
-	wc.wr_id = wqe->wr_id;
-	if (handler) {
-		u32 n;
-
-		/*
-		 * validate head pointer value and compute
-		 * the number of remaining WQEs.
-		 */
-		n = wq->head;
-		if (n >= rq->size)
-			n = 0;
-		if (n < tail)
-			n += rq->size - tail;
-		else
-			n -= tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			handler(&ev, srq->ibsrq.srq_context);
-		} else
-			spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		spin_unlock_irqrestore(&rq->lock, flags);
-
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
-		wc.wc_flags |= IB_WC_GRH;
-	} else
-		ipath_skip_sge(&rsge, sizeof(struct ib_grh));
-	sge = swqe->sg_list;
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		ipath_copy_sge(&rsge, sge->vaddr, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--swqe->wr.num_sge)
-				sge++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		length -= len;
-	}
-	wc.status = IB_WC_SUCCESS;
-	wc.opcode = IB_WC_RECV;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = sqp->ibqp.qp_num;
-	/* XXX do we know which pkey matched? Only needed for GSI. */
-	wc.pkey_index = 0;
-	wc.slid = dev->dd->ipath_lid |
-		(ah_attr->src_path_bits &
-		 ((1 << dev->dd->ipath_lmc) - 1));
-	wc.sl = ah_attr->sl;
-	wc.dlid_path_bits =
-		ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
-drop:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-done:;
-}
-
-/**
- * ipath_make_ud_req - construct a UD request packet
- * @qp: the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_ud_req(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_other_headers *ohdr;
-	struct ib_ah_attr *ah_attr;
-	struct ipath_swqe *wqe;
-	unsigned long flags;
-	u32 nwords;
-	u32 extra_bytes;
-	u32 bth0;
-	u16 lrh0;
-	u16 lid;
-	int ret = 0;
-	int next_cur;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= IPATH_S_WAIT_DMA;
-			goto bail;
-		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
-	}
-
-	if (qp->s_cur == qp->s_head)
-		goto bail;
-
-	wqe = get_swqe_ptr(qp, qp->s_cur);
-	next_cur = qp->s_cur + 1;
-	if (next_cur >= qp->s_size)
-		next_cur = 0;
-
-	/* Construct the header. */
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
-		if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
-			dev->n_multicast_xmit++;
-		else
-			dev->n_unicast_xmit++;
-	} else {
-		dev->n_unicast_xmit++;
-		lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
-		if (unlikely(lid == dev->dd->ipath_lid)) {
-			/*
-			 * If DMAs are in progress, we can't generate
-			 * a completion for the loopback packet since
-			 * it would be out of order.
-			 * XXX Instead of waiting, we could queue a
-			 * zero length descriptor so we get a callback.
-			 */
-			if (atomic_read(&qp->s_dma_busy)) {
-				qp->s_flags |= IPATH_S_WAIT_DMA;
-				goto bail;
-			}
-			qp->s_cur = next_cur;
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			ipath_ud_loopback(qp, wqe);
-			spin_lock_irqsave(&qp->s_lock, flags);
-			ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
-			goto done;
-		}
-	}
-
-	qp->s_cur = next_cur;
-	extra_bytes = -wqe->length & 3;
-	nwords = (wqe->length + extra_bytes) >> 2;
-
-	/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
-	qp->s_hdrwords = 7;
-	qp->s_cur_size = wqe->length;
-	qp->s_cur_sge = &qp->s_sge;
-	qp->s_dmult = ah_attr->static_rate;
-	qp->s_wqe = wqe;
-	qp->s_sge.sge = wqe->sg_list[0];
-	qp->s_sge.sg_list = wqe->sg_list + 1;
-	qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
-
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		/* Header size in 32-bit words. */
-		qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-						 &ah_attr->grh,
-						 qp->s_hdrwords, nwords);
-		lrh0 = IPATH_LRH_GRH;
-		ohdr = &qp->s_hdr.u.l.oth;
-		/*
-		 * Don't worry about sending to locally attached multicast
-		 * QPs.  It is unspecified by the spec. what happens.
-		 */
-	} else {
-		/* Header size in 32-bit words. */
-		lrh0 = IPATH_LRH_BTH;
-		ohdr = &qp->s_hdr.u.oth;
-	}
-	if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
-		qp->s_hdrwords++;
-		ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
-		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-	} else
-		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
-	lrh0 |= ah_attr->sl << 4;
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		lrh0 |= 0xF000;	/* Set VL (see ch. 13.5.3.1) */
-	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);	/* DEST LID */
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-					   SIZE_OF_CRC);
-	lid = dev->dd->ipath_lid;
-	if (lid) {
-		lid |= ah_attr->src_path_bits &
-			((1 << dev->dd->ipath_lmc) - 1);
-		qp->s_hdr.lrh[3] = cpu_to_be16(lid);
-	} else
-		qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-	if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
-		bth0 |= 1 << 23;
-	bth0 |= extra_bytes << 20;
-	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-		ipath_get_pkey(dev->dd, qp->s_pkey_index);
-	ohdr->bth[0] = cpu_to_be32(bth0);
-	/*
-	 * Use the multicast QP if the destination LID is a multicast LID.
-	 */
-	ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-		ah_attr->dlid != IPATH_PERMISSIVE_LID ?
-		cpu_to_be32(IPATH_MULTICAST_QPN) :
-		cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
-	/*
-	 * Qkeys with the high order bit set mean use the
-	 * qkey from the QP context instead of the WR (see 10.2.5).
-	 */
-	ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
-					 qp->qkey : wqe->ud_wr.remote_qkey);
-	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
-
-done:
-	ret = 1;
-	goto unlock;
-
-bail:
-	qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * ipath_ud_rcv - receive an incoming UD packet
- * @dev: the device the packet came in on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_qp_rcv() to process an incoming UD packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	int opcode;
-	u32 hdrsize;
-	u32 pad;
-	struct ib_wc wc;
-	u32 qkey;
-	u32 src_qp;
-	u16 dlid;
-	int header_in_data;
-
-	/* Check for GRH */
-	if (!has_grh) {
-		ohdr = &hdr->u.oth;
-		hdrsize = 8 + 12 + 8;	/* LRH + BTH + DETH */
-		qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-		src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-		header_in_data = 0;
-	} else {
-		ohdr = &hdr->u.l.oth;
-		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
-		/*
-		 * The header with GRH is 68 bytes and the core driver sets
-		 * the eager header buffer size to 56 bytes so the last 12
-		 * bytes of the IB header is in the data buffer.
-		 */
-		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-		if (header_in_data) {
-			qkey = be32_to_cpu(((__be32 *) data)[1]);
-			src_qp = be32_to_cpu(((__be32 *) data)[2]);
-			data += 12;
-		} else {
-			qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-			src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-		}
-	}
-	src_qp &= IPATH_QPN_MASK;
-
-	/*
-	 * Check that the permissive LID is only used on QP0
-	 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
-	 */
-	if (qp->ibqp.qp_num) {
-		if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
-			     hdr->lrh[3] == IB_LID_PERMISSIVE)) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-		if (unlikely(qkey != qp->qkey)) {
-			/* XXX OK to lose a count once in a while. */
-			dev->qkey_violations++;
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-	} else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
-		   hdr->lrh[3] == IB_LID_PERMISSIVE) {
-		struct ib_smp *smp = (struct ib_smp *) data;
-
-		if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-	}
-
-	/*
-	 * The opcode is in the low byte when its in network order
-	 * (top byte when in host order).
-	 */
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-	if (qp->ibqp.qp_num > 1 &&
-	    opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else
-			wc.ex.imm_data = ohdr->u.ud.imm_data;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		hdrsize += sizeof(u32);
-	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
-		wc.ex.imm_data = 0;
-		wc.wc_flags = 0;
-	} else {
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-
-	/* Get the number of bytes the message was padded by. */
-	pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-	if (unlikely(tlen < (hdrsize + pad + 4))) {
-		/* Drop incomplete packets. */
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-	tlen -= hdrsize + pad + 4;
-
-	/* Drop invalid MAD packets (see 13.5.3.1). */
-	if (unlikely((qp->ibqp.qp_num == 0 &&
-		      (tlen != 256 ||
-		       (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
-		     (qp->ibqp.qp_num == 1 &&
-		      (tlen != 256 ||
-		       (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-
-	/*
-	 * A GRH is expected to precede the data even if not
-	 * present on the wire.
-	 */
-	wc.byte_len = tlen + sizeof(struct ib_grh);
-
-	/*
-	 * Get the next work request entry to find where to put the data.
-	 */
-	if (qp->r_flags & IPATH_R_REUSE_SGE)
-		qp->r_flags &= ~IPATH_R_REUSE_SGE;
-	else if (!ipath_get_rwqe(qp, 0)) {
-		/*
-		 * Count VL15 packets dropped due to no receive buffer.
-		 * Otherwise, count them as buffer overruns since usually,
-		 * the HW will be able to receive packets even if there are
-		 * no QPs with posted receive buffers.
-		 */
-		if (qp->ibqp.qp_num == 0)
-			dev->n_vl15_dropped++;
-		else
-			dev->rcv_errors++;
-		goto bail;
-	}
-	/* Silently drop packets which are too big. */
-	if (wc.byte_len > qp->r_len) {
-		qp->r_flags |= IPATH_R_REUSE_SGE;
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-	if (has_grh) {
-		ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-			       sizeof(struct ib_grh));
-		wc.wc_flags |= IB_WC_GRH;
-	} else
-		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
-	ipath_copy_sge(&qp->r_sge, data,
-		       wc.byte_len - sizeof(struct ib_grh));
-	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-		goto bail;
-	wc.wr_id = qp->r_wr_id;
-	wc.status = IB_WC_SUCCESS;
-	wc.opcode = IB_WC_RECV;
-	wc.vendor_err = 0;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = src_qp;
-	/* XXX do we know which pkey matched? Only needed for GSI. */
-	wc.pkey_index = 0;
-	wc.slid = be16_to_cpu(hdr->lrh[3]);
-	wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
-	dlid = be16_to_cpu(hdr->lrh[1]);
-	/*
-	 * Save the LMC lower bits if the destination LID is a unicast LID.
-	 */
-	wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
-		dlid & ((1 << dev->dd->ipath_lmc) - 1);
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       (ohdr->bth[0] &
-			cpu_to_be32(1 << 23)) != 0);
-
-bail:;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c
deleted file mode 100644
index d29b4da..0000000
--- a/drivers/staging/rdma/ipath/ipath_user_pages.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/mm.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-static void __ipath_release_user_pages(struct page **p, size_t num_pages,
-				   int dirty)
-{
-	size_t i;
-
-	for (i = 0; i < num_pages; i++) {
-		ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
-			   (unsigned long) num_pages, p[i]);
-		if (dirty)
-			set_page_dirty_lock(p[i]);
-		put_page(p[i]);
-	}
-}
-
-/* call with current->mm->mmap_sem held */
-static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-				  struct page **p)
-{
-	unsigned long lock_limit;
-	size_t got;
-	int ret;
-
-	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-	if (num_pages > lock_limit) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
-		   (unsigned long) num_pages, start_page);
-
-	for (got = 0; got < num_pages; got += ret) {
-		ret = get_user_pages(current, current->mm,
-				     start_page + got * PAGE_SIZE,
-				     num_pages - got, 1, 1,
-				     p + got, NULL);
-		if (ret < 0)
-			goto bail_release;
-	}
-
-	current->mm->pinned_vm += num_pages;
-
-	ret = 0;
-	goto bail;
-
-bail_release:
-	__ipath_release_user_pages(p, got, 0);
-bail:
-	return ret;
-}
-
-/**
- * ipath_map_page - a safety wrapper around pci_map_page()
- *
- * A dma_addr of all 0's is interpreted by the chip as "disabled".
- * Unfortunately, it can also be a valid dma_addr returned on some
- * architectures.
- *
- * The powerpc iommu assigns dma_addrs in ascending order, so we don't
- * have to bother with retries or mapping a dummy page to insure we
- * don't just get the same mapping again.
- *
- * I'm sure we won't be so lucky with other iommu's, so FIXME.
- */
-dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
-	unsigned long offset, size_t size, int direction)
-{
-	dma_addr_t phys;
-
-	phys = pci_map_page(hwdev, page, offset, size, direction);
-
-	if (phys == 0) {
-		pci_unmap_page(hwdev, phys, size, direction);
-		phys = pci_map_page(hwdev, page, offset, size, direction);
-		/*
-		 * FIXME: If we get 0 again, we should keep this page,
-		 * map another, then free the 0 page.
-		 */
-	}
-
-	return phys;
-}
-
-/**
- * ipath_map_single - a safety wrapper around pci_map_single()
- *
- * Same idea as ipath_map_page().
- */
-dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
-	int direction)
-{
-	dma_addr_t phys;
-
-	phys = pci_map_single(hwdev, ptr, size, direction);
-
-	if (phys == 0) {
-		pci_unmap_single(hwdev, phys, size, direction);
-		phys = pci_map_single(hwdev, ptr, size, direction);
-		/*
-		 * FIXME: If we get 0 again, we should keep this page,
-		 * map another, then free the 0 page.
-		 */
-	}
-
-	return phys;
-}
-
-/**
- * ipath_get_user_pages - lock user pages into memory
- * @start_page: the start page
- * @num_pages: the number of pages
- * @p: the output page structures
- *
- * This function takes a given start page (page aligned user virtual
- * address) and pins it and the following specified number of pages.  For
- * now, num_pages is always 1, but that will probably change at some point
- * (because caller is doing expected sends on a single virtually contiguous
- * buffer, so we can do all pages at once).
- */
-int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-			 struct page **p)
-{
-	int ret;
-
-	down_write(&current->mm->mmap_sem);
-
-	ret = __ipath_get_user_pages(start_page, num_pages, p);
-
-	up_write(&current->mm->mmap_sem);
-
-	return ret;
-}
-
-void ipath_release_user_pages(struct page **p, size_t num_pages)
-{
-	down_write(&current->mm->mmap_sem);
-
-	__ipath_release_user_pages(p, num_pages, 1);
-
-	current->mm->pinned_vm -= num_pages;
-
-	up_write(&current->mm->mmap_sem);
-}
-
-struct ipath_user_pages_work {
-	struct work_struct work;
-	struct mm_struct *mm;
-	unsigned long num_pages;
-};
-
-static void user_pages_account(struct work_struct *_work)
-{
-	struct ipath_user_pages_work *work =
-		container_of(_work, struct ipath_user_pages_work, work);
-
-	down_write(&work->mm->mmap_sem);
-	work->mm->pinned_vm -= work->num_pages;
-	up_write(&work->mm->mmap_sem);
-	mmput(work->mm);
-	kfree(work);
-}
-
-void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
-{
-	struct ipath_user_pages_work *work;
-	struct mm_struct *mm;
-
-	__ipath_release_user_pages(p, num_pages, 1);
-
-	mm = get_task_mm(current);
-	if (!mm)
-		return;
-
-	work = kmalloc(sizeof(*work), GFP_KERNEL);
-	if (!work)
-		goto bail_mm;
-
-	INIT_WORK(&work->work, user_pages_account);
-	work->mm = mm;
-	work->num_pages = num_pages;
-
-	queue_work(ib_wq, &work->work);
-	return;
-
-bail_mm:
-	mmput(mm);
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.c b/drivers/staging/rdma/ipath/ipath_user_sdma.c
deleted file mode 100644
index 8c12e3c..0000000
--- a/drivers/staging/rdma/ipath/ipath_user_sdma.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/dmapool.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/uio.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_user_sdma.h"
-
-/* minimum size of header */
-#define IPATH_USER_SDMA_MIN_HEADER_LENGTH	64
-/* expected size of headers (for dma_pool) */
-#define IPATH_USER_SDMA_EXP_HEADER_LENGTH	64
-/* length mask in PBC (lower 11 bits) */
-#define IPATH_PBC_LENGTH_MASK			((1 << 11) - 1)
-
-struct ipath_user_sdma_pkt {
-	u8 naddr;		/* dimension of addr (1..3) ... */
-	u32 counter;		/* sdma pkts queued counter for this entry */
-	u64 added;		/* global descq number of entries */
-
-	struct {
-		u32 offset;			/* offset for kvaddr, addr */
-		u32 length;			/* length in page */
-		u8  put_page;			/* should we put_page? */
-		u8  dma_mapped;			/* is page dma_mapped? */
-		struct page *page;		/* may be NULL (coherent mem) */
-		void *kvaddr;			/* FIXME: only for pio hack */
-		dma_addr_t addr;
-	} addr[4];   /* max pages, any more and we coalesce */
-	struct list_head list;	/* list element */
-};
-
-struct ipath_user_sdma_queue {
-	/*
-	 * pkts sent to dma engine are queued on this
-	 * list head.  the type of the elements of this
-	 * list are struct ipath_user_sdma_pkt...
-	 */
-	struct list_head sent;
-
-	/* headers with expected length are allocated from here... */
-	char header_cache_name[64];
-	struct dma_pool *header_cache;
-
-	/* packets are allocated from the slab cache... */
-	char pkt_slab_name[64];
-	struct kmem_cache *pkt_slab;
-
-	/* as packets go on the queued queue, they are counted... */
-	u32 counter;
-	u32 sent_counter;
-
-	/* dma page table */
-	struct rb_root dma_pages_root;
-
-	/* protect everything above... */
-	struct mutex lock;
-};
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
-{
-	struct ipath_user_sdma_queue *pq =
-		kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
-
-	if (!pq)
-		goto done;
-
-	pq->counter = 0;
-	pq->sent_counter = 0;
-	INIT_LIST_HEAD(&pq->sent);
-
-	mutex_init(&pq->lock);
-
-	snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
-		 "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
-	pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
-					 sizeof(struct ipath_user_sdma_pkt),
-					 0, 0, NULL);
-
-	if (!pq->pkt_slab)
-		goto err_kfree;
-
-	snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
-		 "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
-	pq->header_cache = dma_pool_create(pq->header_cache_name,
-					   dev,
-					   IPATH_USER_SDMA_EXP_HEADER_LENGTH,
-					   4, 0);
-	if (!pq->header_cache)
-		goto err_slab;
-
-	pq->dma_pages_root = RB_ROOT;
-
-	goto done;
-
-err_slab:
-	kmem_cache_destroy(pq->pkt_slab);
-err_kfree:
-	kfree(pq);
-	pq = NULL;
-
-done:
-	return pq;
-}
-
-static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
-				      int i, size_t offset, size_t len,
-				      int put_page, int dma_mapped,
-				      struct page *page,
-				      void *kvaddr, dma_addr_t dma_addr)
-{
-	pkt->addr[i].offset = offset;
-	pkt->addr[i].length = len;
-	pkt->addr[i].put_page = put_page;
-	pkt->addr[i].dma_mapped = dma_mapped;
-	pkt->addr[i].page = page;
-	pkt->addr[i].kvaddr = kvaddr;
-	pkt->addr[i].addr = dma_addr;
-}
-
-static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
-					u32 counter, size_t offset,
-					size_t len, int dma_mapped,
-					struct page *page,
-					void *kvaddr, dma_addr_t dma_addr)
-{
-	pkt->naddr = 1;
-	pkt->counter = counter;
-	ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
-				  kvaddr, dma_addr);
-}
-
-/* we've too many pages in the iovec, coalesce to a single page */
-static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
-				    struct ipath_user_sdma_pkt *pkt,
-				    const struct iovec *iov,
-				    unsigned long niov) {
-	int ret = 0;
-	struct page *page = alloc_page(GFP_KERNEL);
-	void *mpage_save;
-	char *mpage;
-	int i;
-	int len = 0;
-	dma_addr_t dma_addr;
-
-	if (!page) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	mpage = kmap(page);
-	mpage_save = mpage;
-	for (i = 0; i < niov; i++) {
-		int cfur;
-
-		cfur = copy_from_user(mpage,
-				      iov[i].iov_base, iov[i].iov_len);
-		if (cfur) {
-			ret = -EFAULT;
-			goto free_unmap;
-		}
-
-		mpage += iov[i].iov_len;
-		len += iov[i].iov_len;
-	}
-
-	dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
-				DMA_TO_DEVICE);
-	if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-		ret = -ENOMEM;
-		goto free_unmap;
-	}
-
-	ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
-				  dma_addr);
-	pkt->naddr = 2;
-
-	goto done;
-
-free_unmap:
-	kunmap(page);
-	__free_page(page);
-done:
-	return ret;
-}
-
-/* how many pages in this iovec element? */
-static int ipath_user_sdma_num_pages(const struct iovec *iov)
-{
-	const unsigned long addr  = (unsigned long) iov->iov_base;
-	const unsigned long  len  = iov->iov_len;
-	const unsigned long spage = addr & PAGE_MASK;
-	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-
-	return 1 + ((epage - spage) >> PAGE_SHIFT);
-}
-
-/* truncate length to page boundary */
-static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
-	const unsigned long offset = offset_in_page(addr);
-
-	return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
-static void ipath_user_sdma_free_pkt_frag(struct device *dev,
-					  struct ipath_user_sdma_queue *pq,
-					  struct ipath_user_sdma_pkt *pkt,
-					  int frag)
-{
-	const int i = frag;
-
-	if (pkt->addr[i].page) {
-		if (pkt->addr[i].dma_mapped)
-			dma_unmap_page(dev,
-				       pkt->addr[i].addr,
-				       pkt->addr[i].length,
-				       DMA_TO_DEVICE);
-
-		if (pkt->addr[i].kvaddr)
-			kunmap(pkt->addr[i].page);
-
-		if (pkt->addr[i].put_page)
-			put_page(pkt->addr[i].page);
-		else
-			__free_page(pkt->addr[i].page);
-	} else if (pkt->addr[i].kvaddr)
-		/* free coherent mem from cache... */
-		dma_pool_free(pq->header_cache,
-			      pkt->addr[i].kvaddr, pkt->addr[i].addr);
-}
-
-/* return number of pages pinned... */
-static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
-				     struct ipath_user_sdma_pkt *pkt,
-				     unsigned long addr, int tlen, int npages)
-{
-	struct page *pages[2];
-	int j;
-	int ret;
-
-	ret = get_user_pages_fast(addr, npages, 0, pages);
-	if (ret != npages) {
-		int i;
-
-		for (i = 0; i < ret; i++)
-			put_page(pages[i]);
-
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	for (j = 0; j < npages; j++) {
-		/* map the pages... */
-		const int flen =
-			ipath_user_sdma_page_length(addr, tlen);
-		dma_addr_t dma_addr =
-			dma_map_page(&dd->pcidev->dev,
-				     pages[j], 0, flen, DMA_TO_DEVICE);
-		unsigned long fofs = offset_in_page(addr);
-
-		if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-			ret = -ENOMEM;
-			goto done;
-		}
-
-		ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
-					  pages[j], kmap(pages[j]),
-					  dma_addr);
-
-		pkt->naddr++;
-		addr += flen;
-		tlen -= flen;
-	}
-
-done:
-	return ret;
-}
-
-static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
-				   struct ipath_user_sdma_queue *pq,
-				   struct ipath_user_sdma_pkt *pkt,
-				   const struct iovec *iov,
-				   unsigned long niov)
-{
-	int ret = 0;
-	unsigned long idx;
-
-	for (idx = 0; idx < niov; idx++) {
-		const int npages = ipath_user_sdma_num_pages(iov + idx);
-		const unsigned long addr = (unsigned long) iov[idx].iov_base;
-
-		ret = ipath_user_sdma_pin_pages(dd, pkt,
-						addr, iov[idx].iov_len,
-						npages);
-		if (ret < 0)
-			goto free_pkt;
-	}
-
-	goto done;
-
-free_pkt:
-	for (idx = 0; idx < pkt->naddr; idx++)
-		ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
-
-done:
-	return ret;
-}
-
-static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
-					struct ipath_user_sdma_queue *pq,
-					struct ipath_user_sdma_pkt *pkt,
-					const struct iovec *iov,
-					unsigned long niov, int npages)
-{
-	int ret = 0;
-
-	if (npages >= ARRAY_SIZE(pkt->addr))
-		ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
-	else
-		ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
-
-	return ret;
-}
-
-/* free a packet list -- return counter value of last packet */
-static void ipath_user_sdma_free_pkt_list(struct device *dev,
-					  struct ipath_user_sdma_queue *pq,
-					  struct list_head *list)
-{
-	struct ipath_user_sdma_pkt *pkt, *pkt_next;
-
-	list_for_each_entry_safe(pkt, pkt_next, list, list) {
-		int i;
-
-		for (i = 0; i < pkt->naddr; i++)
-			ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-
-		kmem_cache_free(pq->pkt_slab, pkt);
-	}
-}
-
-/*
- * copy headers, coalesce etc -- pq->lock must be held
- *
- * we queue all the packets to list, returning the
- * number of bytes total.  list must be empty initially,
- * as, if there is an error we clean it...
- */
-static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
-				      struct ipath_user_sdma_queue *pq,
-				      struct list_head *list,
-				      const struct iovec *iov,
-				      unsigned long niov,
-				      int maxpkts)
-{
-	unsigned long idx = 0;
-	int ret = 0;
-	int npkts = 0;
-	struct page *page = NULL;
-	__le32 *pbc;
-	dma_addr_t dma_addr;
-	struct ipath_user_sdma_pkt *pkt = NULL;
-	size_t len;
-	size_t nw;
-	u32 counter = pq->counter;
-	int dma_mapped = 0;
-
-	while (idx < niov && npkts < maxpkts) {
-		const unsigned long addr = (unsigned long) iov[idx].iov_base;
-		const unsigned long idx_save = idx;
-		unsigned pktnw;
-		unsigned pktnwc;
-		int nfrags = 0;
-		int npages = 0;
-		int cfur;
-
-		dma_mapped = 0;
-		len = iov[idx].iov_len;
-		nw = len >> 2;
-		page = NULL;
-
-		pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-		if (!pkt) {
-			ret = -ENOMEM;
-			goto free_list;
-		}
-
-		if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
-		    len > PAGE_SIZE || len & 3 || addr & 3) {
-			ret = -EINVAL;
-			goto free_pkt;
-		}
-
-		if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
-			pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
-					     &dma_addr);
-		else
-			pbc = NULL;
-
-		if (!pbc) {
-			page = alloc_page(GFP_KERNEL);
-			if (!page) {
-				ret = -ENOMEM;
-				goto free_pkt;
-			}
-			pbc = kmap(page);
-		}
-
-		cfur = copy_from_user(pbc, iov[idx].iov_base, len);
-		if (cfur) {
-			ret = -EFAULT;
-			goto free_pbc;
-		}
-
-		/*
-		 * this assignment is a bit strange.  it's because the
-		 * the pbc counts the number of 32 bit words in the full
-		 * packet _except_ the first word of the pbc itself...
-		 */
-		pktnwc = nw - 1;
-
-		/*
-		 * pktnw computation yields the number of 32 bit words
-		 * that the caller has indicated in the PBC.  note that
-		 * this is one less than the total number of words that
-		 * goes to the send DMA engine as the first 32 bit word
-		 * of the PBC itself is not counted.  Armed with this count,
-		 * we can verify that the packet is consistent with the
-		 * iovec lengths.
-		 */
-		pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
-		if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
-			ret = -EINVAL;
-			goto free_pbc;
-		}
-
-
-		idx++;
-		while (pktnwc < pktnw && idx < niov) {
-			const size_t slen = iov[idx].iov_len;
-			const unsigned long faddr =
-				(unsigned long) iov[idx].iov_base;
-
-			if (slen & 3 || faddr & 3 || !slen ||
-			    slen > PAGE_SIZE) {
-				ret = -EINVAL;
-				goto free_pbc;
-			}
-
-			npages++;
-			if ((faddr & PAGE_MASK) !=
-			    ((faddr + slen - 1) & PAGE_MASK))
-				npages++;
-
-			pktnwc += slen >> 2;
-			idx++;
-			nfrags++;
-		}
-
-		if (pktnwc != pktnw) {
-			ret = -EINVAL;
-			goto free_pbc;
-		}
-
-		if (page) {
-			dma_addr = dma_map_page(&dd->pcidev->dev,
-						page, 0, len, DMA_TO_DEVICE);
-			if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-				ret = -ENOMEM;
-				goto free_pbc;
-			}
-
-			dma_mapped = 1;
-		}
-
-		ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
-					    page, pbc, dma_addr);
-
-		if (nfrags) {
-			ret = ipath_user_sdma_init_payload(dd, pq, pkt,
-							   iov + idx_save + 1,
-							   nfrags, npages);
-			if (ret < 0)
-				goto free_pbc_dma;
-		}
-
-		counter++;
-		npkts++;
-
-		list_add_tail(&pkt->list, list);
-	}
-
-	ret = idx;
-	goto done;
-
-free_pbc_dma:
-	if (dma_mapped)
-		dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
-free_pbc:
-	if (page) {
-		kunmap(page);
-		__free_page(page);
-	} else
-		dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
-	kmem_cache_free(pq->pkt_slab, pkt);
-free_list:
-	ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
-done:
-	return ret;
-}
-
-static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
-						 u32 c)
-{
-	pq->sent_counter = c;
-}
-
-/* try to clean out queue -- needs pq->lock */
-static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
-				       struct ipath_user_sdma_queue *pq)
-{
-	struct list_head free_list;
-	struct ipath_user_sdma_pkt *pkt;
-	struct ipath_user_sdma_pkt *pkt_prev;
-	int ret = 0;
-
-	INIT_LIST_HEAD(&free_list);
-
-	list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
-		s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
-
-		if (descd < 0)
-			break;
-
-		list_move_tail(&pkt->list, &free_list);
-
-		/* one more packet cleaned */
-		ret++;
-	}
-
-	if (!list_empty(&free_list)) {
-		u32 counter;
-
-		pkt = list_entry(free_list.prev,
-				 struct ipath_user_sdma_pkt, list);
-		counter = pkt->counter;
-
-		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-		ipath_user_sdma_set_complete_counter(pq, counter);
-	}
-
-	return ret;
-}
-
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
-{
-	if (!pq)
-		return;
-
-	kmem_cache_destroy(pq->pkt_slab);
-	dma_pool_destroy(pq->header_cache);
-	kfree(pq);
-}
-
-/* clean descriptor queue, returns > 0 if some elements cleaned */
-static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	ret = ipath_sdma_make_progress(dd);
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	return ret;
-}
-
-/* we're in close, drain packets so that we can cleanup successfully... */
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-				 struct ipath_user_sdma_queue *pq)
-{
-	int i;
-
-	if (!pq)
-		return;
-
-	for (i = 0; i < 100; i++) {
-		mutex_lock(&pq->lock);
-		if (list_empty(&pq->sent)) {
-			mutex_unlock(&pq->lock);
-			break;
-		}
-		ipath_user_sdma_hwqueue_clean(dd);
-		ipath_user_sdma_queue_clean(dd, pq);
-		mutex_unlock(&pq->lock);
-		msleep(10);
-	}
-
-	if (!list_empty(&pq->sent)) {
-		struct list_head free_list;
-
-		printk(KERN_INFO "drain: lists not empty: forcing!\n");
-		INIT_LIST_HEAD(&free_list);
-		mutex_lock(&pq->lock);
-		list_splice_init(&pq->sent, &free_list);
-		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-		mutex_unlock(&pq->lock);
-	}
-}
-
-static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
-					   u64 addr, u64 dwlen, u64 dwoffset)
-{
-	return cpu_to_le64(/* SDmaPhyAddr[31:0] */
-			   ((addr & 0xfffffffcULL) << 32) |
-			   /* SDmaGeneration[1:0] */
-			   ((dd->ipath_sdma_generation & 3ULL) << 30) |
-			   /* SDmaDwordCount[10:0] */
-			   ((dwlen & 0x7ffULL) << 16) |
-			   /* SDmaBufOffset[12:2] */
-			   (dwoffset & 0x7ffULL));
-}
-
-static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
-{
-	return descq | cpu_to_le64(1ULL << 12);
-}
-
-static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
-{
-					      /* last */  /* dma head */
-	return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
-}
-
-static inline __le64 ipath_sdma_make_desc1(u64 addr)
-{
-	/* SDmaPhyAddr[47:32] */
-	return cpu_to_le64(addr >> 32);
-}
-
-static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
-				      struct ipath_user_sdma_pkt *pkt, int idx,
-				      unsigned ofs, u16 tail)
-{
-	const u64 addr = (u64) pkt->addr[idx].addr +
-		(u64) pkt->addr[idx].offset;
-	const u64 dwlen = (u64) pkt->addr[idx].length / 4;
-	__le64 *descqp;
-	__le64 descq0;
-
-	descqp = &dd->ipath_sdma_descq[tail].qw[0];
-
-	descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
-	if (idx == 0)
-		descq0 = ipath_sdma_make_first_desc0(descq0);
-	if (idx == pkt->naddr - 1)
-		descq0 = ipath_sdma_make_last_desc0(descq0);
-
-	descqp[0] = descq0;
-	descqp[1] = ipath_sdma_make_desc1(addr);
-}
-
-/* pq->lock must be held, get packets on the wire... */
-static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
-				     struct ipath_user_sdma_queue *pq,
-				     struct list_head *pktlist)
-{
-	int ret = 0;
-	unsigned long flags;
-	u16 tail;
-
-	if (list_empty(pktlist))
-		return 0;
-
-	if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
-		return -ECOMM;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
-		ret = -ECOMM;
-		goto unlock;
-	}
-
-	tail = dd->ipath_sdma_descq_tail;
-	while (!list_empty(pktlist)) {
-		struct ipath_user_sdma_pkt *pkt =
-			list_entry(pktlist->next, struct ipath_user_sdma_pkt,
-				   list);
-		int i;
-		unsigned ofs = 0;
-		u16 dtail = tail;
-
-		if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
-			goto unlock_check_tail;
-
-		for (i = 0; i < pkt->naddr; i++) {
-			ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
-			ofs += pkt->addr[i].length >> 2;
-
-			if (++tail == dd->ipath_sdma_descq_cnt) {
-				tail = 0;
-				++dd->ipath_sdma_generation;
-			}
-		}
-
-		if ((ofs<<2) > dd->ipath_ibmaxlen) {
-			ipath_dbg("packet size %X > ibmax %X, fail\n",
-				ofs<<2, dd->ipath_ibmaxlen);
-			ret = -EMSGSIZE;
-			goto unlock;
-		}
-
-		/*
-		 * if the packet is >= 2KB mtu equivalent, we have to use
-		 * the large buffers, and have to mark each descriptor as
-		 * part of a large buffer packet.
-		 */
-		if (ofs >= IPATH_SMALLBUF_DWORDS) {
-			for (i = 0; i < pkt->naddr; i++) {
-				dd->ipath_sdma_descq[dtail].qw[0] |=
-					cpu_to_le64(1ULL << 14);
-				if (++dtail == dd->ipath_sdma_descq_cnt)
-					dtail = 0;
-			}
-		}
-
-		dd->ipath_sdma_descq_added += pkt->naddr;
-		pkt->added = dd->ipath_sdma_descq_added;
-		list_move_tail(&pkt->list, &pq->sent);
-		ret++;
-	}
-
-unlock_check_tail:
-	/* advance the tail on the chip if necessary */
-	if (dd->ipath_sdma_descq_tail != tail) {
-		wmb();
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-		dd->ipath_sdma_descq_tail = tail;
-	}
-
-unlock:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	return ret;
-}
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-			   struct ipath_user_sdma_queue *pq,
-			   const struct iovec *iov,
-			   unsigned long dim)
-{
-	int ret = 0;
-	struct list_head list;
-	int npkts = 0;
-
-	INIT_LIST_HEAD(&list);
-
-	mutex_lock(&pq->lock);
-
-	if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
-		ipath_user_sdma_hwqueue_clean(dd);
-		ipath_user_sdma_queue_clean(dd, pq);
-	}
-
-	while (dim) {
-		const int mxp = 8;
-
-		ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
-		if (ret <= 0)
-			goto done_unlock;
-		else {
-			dim -= ret;
-			iov += ret;
-		}
-
-		/* force packets onto the sdma hw queue... */
-		if (!list_empty(&list)) {
-			/*
-			 * lazily clean hw queue.  the 4 is a guess of about
-			 * how many sdma descriptors a packet will take (it
-			 * doesn't have to be perfect).
-			 */
-			if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
-				ipath_user_sdma_hwqueue_clean(dd);
-				ipath_user_sdma_queue_clean(dd, pq);
-			}
-
-			ret = ipath_user_sdma_push_pkts(dd, pq, &list);
-			if (ret < 0)
-				goto done_unlock;
-			else {
-				npkts += ret;
-				pq->counter += ret;
-
-				if (!list_empty(&list))
-					goto done_unlock;
-			}
-		}
-	}
-
-done_unlock:
-	if (!list_empty(&list))
-		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
-	mutex_unlock(&pq->lock);
-
-	return (ret < 0) ? ret : npkts;
-}
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-				  struct ipath_user_sdma_queue *pq)
-{
-	int ret = 0;
-
-	mutex_lock(&pq->lock);
-	ipath_user_sdma_hwqueue_clean(dd);
-	ret = ipath_user_sdma_queue_clean(dd, pq);
-	mutex_unlock(&pq->lock);
-
-	return ret;
-}
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
-{
-	return pq->sent_counter;
-}
-
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
-{
-	return pq->counter;
-}
-
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.h b/drivers/staging/rdma/ipath/ipath_user_sdma.h
deleted file mode 100644
index fc76316..0000000
--- a/drivers/staging/rdma/ipath/ipath_user_sdma.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/device.h>
-
-struct ipath_user_sdma_queue;
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-			   struct ipath_user_sdma_queue *pq,
-			   const struct iovec *iov,
-			   unsigned long dim);
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-				  struct ipath_user_sdma_queue *pq);
-
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-				 struct ipath_user_sdma_queue *pq);
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
deleted file mode 100644
index 53f9dca..0000000
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ /dev/null
@@ -1,2376 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_mad.h>
-#include <rdma/ib_user_verbs.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/utsname.h>
-#include <linux/rculist.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-static unsigned int ib_ipath_qp_table_size = 251;
-module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
-MODULE_PARM_DESC(qp_table_size, "QP table size");
-
-unsigned int ib_ipath_lkey_table_size = 12;
-module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
-		   S_IRUGO);
-MODULE_PARM_DESC(lkey_table_size,
-		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
-
-static unsigned int ib_ipath_max_pds = 0xFFFF;
-module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_pds,
-		 "Maximum number of protection domains to support");
-
-static unsigned int ib_ipath_max_ahs = 0xFFFF;
-module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-
-unsigned int ib_ipath_max_cqes = 0x2FFFF;
-module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqes,
-		 "Maximum number of completion queue entries to support");
-
-unsigned int ib_ipath_max_cqs = 0x1FFFF;
-module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
-
-unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
-module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
-		   S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-
-unsigned int ib_ipath_max_qps = 16384;
-module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
-
-unsigned int ib_ipath_max_sges = 0x60;
-module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
-
-unsigned int ib_ipath_max_mcast_grps = 16384;
-module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
-		   S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_grps,
-		 "Maximum number of multicast groups to support");
-
-unsigned int ib_ipath_max_mcast_qp_attached = 16;
-module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
-		   uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_qp_attached,
-		 "Maximum number of attached QPs to support");
-
-unsigned int ib_ipath_max_srqs = 1024;
-module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
-
-unsigned int ib_ipath_max_srq_sges = 128;
-module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
-		   uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
-
-unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
-module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
-		   uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
-
-static unsigned int ib_ipath_disable_sma;
-module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(disable_sma, "Disable the SMA");
-
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; ipath_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = IPATH_POST_RECV_OK,
-	[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
-	    IPATH_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-	[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
-	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-};
-
-struct ipath_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
-	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-	[IB_WR_SEND] = IB_WC_SEND,
-	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
- * System image GUID.
- */
-static __be64 sys_image_guid;
-
-/**
- * ipath_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		memcpy(sge->vaddr, data, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		data += len;
-		length -= len;
-	}
-}
-
-/**
- * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		length -= len;
-	}
-}
-
-/*
- * Count the number of DMA descriptors needed to send length bytes of data.
- * Don't modify the ipath_sge_state to get the count.
- * Return zero if any of the segments is not aligned.
- */
-static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sg_list = ss->sg_list;
-	struct ipath_sge sge = ss->sge;
-	u8 num_sge = ss->num_sge;
-	u32 ndesc = 1;	/* count the header */
-
-	while (length) {
-		u32 len = sge.length;
-
-		if (len > length)
-			len = length;
-		if (len > sge.sge_length)
-			len = sge.sge_length;
-		BUG_ON(len == 0);
-		if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
-		    (len != length && (len & (sizeof(u32) - 1)))) {
-			ndesc = 0;
-			break;
-		}
-		ndesc++;
-		sge.vaddr += len;
-		sge.length -= len;
-		sge.sge_length -= len;
-		if (sge.sge_length == 0) {
-			if (--num_sge)
-				sge = *sg_list++;
-		} else if (sge.length == 0 && sge.mr != NULL) {
-			if (++sge.n >= IPATH_SEGSZ) {
-				if (++sge.m >= sge.mr->mapsz)
-					break;
-				sge.n = 0;
-			}
-			sge.vaddr =
-				sge.mr->map[sge.m]->segs[sge.n].vaddr;
-			sge.length =
-				sge.mr->map[sge.m]->segs[sge.n].length;
-		}
-		length -= len;
-	}
-	return ndesc;
-}
-
-/*
- * Copy from the SGEs to the data buffer.
- */
-static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
-				u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		memcpy(data, sge->vaddr, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		data += len;
-		length -= len;
-	}
-}
-
-/**
- * ipath_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-	struct ipath_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	int ret;
-	unsigned long flags;
-	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (qp->ibqp.qp_type != IB_QPT_SMI &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		ret = -ENETDOWN;
-		goto bail;
-	}
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
-		goto bail_inval;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (wr->num_sge > qp->s_max_sge)
-		goto bail_inval;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type == IB_QPT_UD) {
-		/* Check UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			goto bail_inval;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			goto bail_inval;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		goto bail_inval;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
-					   &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval;
-	} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
-		goto bail_inval;
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	ret = 0;
-	goto bail;
-
-bail_inval:
-	ret = -EINVAL;
-bail:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * ipath_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			   struct ib_send_wr **bad_wr)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	int err = 0;
-
-	for (; wr; wr = wr->next) {
-		err = ipath_post_one_send(qp, wr);
-		if (err) {
-			*bad_wr = wr;
-			goto bail;
-		}
-	}
-
-	/* Try to do the send work in the caller's context. */
-	ipath_do_send((unsigned long) qp);
-
-bail:
-	return err;
-}
-
-/**
- * ipath_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct ipath_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_qp_rcv - processing an incoming packet on a QP
- * @dev: the device the packet came on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_ib_rcv() to process an incoming packet
- * for the given QP.
- * Called at interrupt level.
- */
-static void ipath_qp_rcv(struct ipath_ibdev *dev,
-			 struct ipath_ib_header *hdr, int has_grh,
-			 void *data, u32 tlen, struct ipath_qp *qp)
-{
-	/* Check for valid receive state. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		dev->n_pkt_drops++;
-		return;
-	}
-
-	switch (qp->ibqp.qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (ib_ipath_disable_sma)
-			break;
-		/* FALLTHROUGH */
-	case IB_QPT_UD:
-		ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
-		break;
-
-	case IB_QPT_RC:
-		ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
-		break;
-
-	case IB_QPT_UC:
-		ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
-		break;
-
-	default:
-		break;
-	}
-}
-
-/**
- * ipath_ib_rcv - process an incoming packet
- * @arg: the device pointer
- * @rhdr: the header of the packet
- * @data: the packet data
- * @tlen: the packet length
- *
- * This is called from ipath_kreceive() to process an incoming packet at
- * interrupt level. Tlen is the length of the header + data + CRC in bytes.
- */
-void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
-		  u32 tlen)
-{
-	struct ipath_ib_header *hdr = rhdr;
-	struct ipath_other_headers *ohdr;
-	struct ipath_qp *qp;
-	u32 qp_num;
-	int lnh;
-	u8 opcode;
-	u16 lid;
-
-	if (unlikely(dev == NULL))
-		goto bail;
-
-	if (unlikely(tlen < 24)) {	/* LRH+BTH+CRC */
-		dev->rcv_errors++;
-		goto bail;
-	}
-
-	/* Check for a valid destination LID (see ch. 7.11.1). */
-	lid = be16_to_cpu(hdr->lrh[1]);
-	if (lid < IPATH_MULTICAST_LID_BASE) {
-		lid &= ~((1 << dev->dd->ipath_lmc) - 1);
-		if (unlikely(lid != dev->dd->ipath_lid)) {
-			dev->rcv_errors++;
-			goto bail;
-		}
-	}
-
-	/* Check for GRH */
-	lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-	if (lnh == IPATH_LRH_BTH)
-		ohdr = &hdr->u.oth;
-	else if (lnh == IPATH_LRH_GRH)
-		ohdr = &hdr->u.l.oth;
-	else {
-		dev->rcv_errors++;
-		goto bail;
-	}
-
-	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
-	dev->opstats[opcode].n_bytes += tlen;
-	dev->opstats[opcode].n_packets++;
-
-	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
-	if (qp_num == IPATH_MULTICAST_QPN) {
-		struct ipath_mcast *mcast;
-		struct ipath_mcast_qp *p;
-
-		if (lnh != IPATH_LRH_GRH) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-		mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
-		if (mcast == NULL) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-		dev->n_multicast_rcv++;
-		list_for_each_entry_rcu(p, &mcast->qp_list, list)
-			ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
-		/*
-		 * Notify ipath_multicast_detach() if it is waiting for us
-		 * to finish.
-		 */
-		if (atomic_dec_return(&mcast->refcount) <= 1)
-			wake_up(&mcast->wait);
-	} else {
-		qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
-		if (qp) {
-			dev->n_unicast_rcv++;
-			ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-				     tlen, qp);
-			/*
-			 * Notify ipath_destroy_qp() if it is waiting
-			 * for us to finish.
-			 */
-			if (atomic_dec_and_test(&qp->refcount))
-				wake_up(&qp->wait);
-		} else
-			dev->n_pkt_drops++;
-	}
-
-bail:;
-}
-
-/**
- * ipath_ib_timer - verbs timer
- * @arg: the device pointer
- *
- * This is called from ipath_do_rcv_timer() at interrupt level to check for
- * QPs which need retransmits and to collect performance numbers.
- */
-static void ipath_ib_timer(struct ipath_ibdev *dev)
-{
-	struct ipath_qp *resend = NULL;
-	struct ipath_qp *rnr = NULL;
-	struct list_head *last;
-	struct ipath_qp *qp;
-	unsigned long flags;
-
-	if (dev == NULL)
-		return;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	/* Start filling the next pending queue. */
-	if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
-		dev->pending_index = 0;
-	/* Save any requests still in the new queue, they have timed out. */
-	last = &dev->pending[dev->pending_index];
-	while (!list_empty(last)) {
-		qp = list_entry(last->next, struct ipath_qp, timerwait);
-		list_del_init(&qp->timerwait);
-		qp->timer_next = resend;
-		resend = qp;
-		atomic_inc(&qp->refcount);
-	}
-	last = &dev->rnrwait;
-	if (!list_empty(last)) {
-		qp = list_entry(last->next, struct ipath_qp, timerwait);
-		if (--qp->s_rnr_timeout == 0) {
-			do {
-				list_del_init(&qp->timerwait);
-				qp->timer_next = rnr;
-				rnr = qp;
-				atomic_inc(&qp->refcount);
-				if (list_empty(last))
-					break;
-				qp = list_entry(last->next, struct ipath_qp,
-						timerwait);
-			} while (qp->s_rnr_timeout == 0);
-		}
-	}
-	/*
-	 * We should only be in the started state if pma_sample_start != 0
-	 */
-	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
-	    --dev->pma_sample_start == 0) {
-		dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-		ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
-					&dev->ipath_rword,
-					&dev->ipath_spkts,
-					&dev->ipath_rpkts,
-					&dev->ipath_xmit_wait);
-	}
-	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
-		if (dev->pma_sample_interval == 0) {
-			u64 ta, tb, tc, td, te;
-
-			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-			ipath_snapshot_counters(dev->dd, &ta, &tb,
-						&tc, &td, &te);
-
-			dev->ipath_sword = ta - dev->ipath_sword;
-			dev->ipath_rword = tb - dev->ipath_rword;
-			dev->ipath_spkts = tc - dev->ipath_spkts;
-			dev->ipath_rpkts = td - dev->ipath_rpkts;
-			dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
-		} else {
-			dev->pma_sample_interval--;
-		}
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	/* XXX What if timer fires again while this is running? */
-	while (resend != NULL) {
-		qp = resend;
-		resend = qp->timer_next;
-
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_last != qp->s_tail &&
-		    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-			dev->n_timeouts++;
-			ipath_restart_rc(qp, qp->s_last_psn + 1);
-		}
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-
-		/* Notify ipath_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-	while (rnr != NULL) {
-		qp = rnr;
-		rnr = qp->timer_next;
-
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-
-		/* Notify ipath_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-}
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	sge->vaddr += length;
-	sge->length -= length;
-	sge->sge_length -= length;
-	if (sge->sge_length == 0) {
-		if (--ss->num_sge)
-			*sge = *ss->sg_list++;
-	} else if (sge->length == 0 && sge->mr != NULL) {
-		if (++sge->n >= IPATH_SEGSZ) {
-			if (++sge->m >= sge->mr->mapsz)
-				return;
-			sge->n = 0;
-		}
-		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-	}
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-		    u32 length, unsigned flush_wc)
-{
-	u32 extra = 0;
-	u32 data = 0;
-	u32 last;
-
-	while (1) {
-		u32 len = ss->sge.length;
-		u32 off;
-
-		if (len > length)
-			len = length;
-		if (len > ss->sge.sge_length)
-			len = ss->sge.sge_length;
-		BUG_ON(len == 0);
-		/* If the source address is not aligned, try to align it. */
-		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-		if (off) {
-			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-					    ~(sizeof(u32) - 1));
-			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-			u32 y;
-
-			y = sizeof(u32) - off;
-			if (len > y)
-				len = y;
-			if (len + extra >= sizeof(u32)) {
-				data |= set_upper_bits(v, extra *
-						       BITS_PER_BYTE);
-				len = sizeof(u32) - extra;
-				if (len == length) {
-					last = data;
-					break;
-				}
-				__raw_writel(data, piobuf);
-				piobuf++;
-				extra = 0;
-				data = 0;
-			} else {
-				/* Clear unused upper bytes */
-				data |= clear_upper_bytes(v, len, extra);
-				if (len == length) {
-					last = data;
-					break;
-				}
-				extra += len;
-			}
-		} else if (extra) {
-			/* Source address is aligned. */
-			u32 *addr = (u32 *) ss->sge.vaddr;
-			int shift = extra * BITS_PER_BYTE;
-			int ushift = 32 - shift;
-			u32 l = len;
-
-			while (l >= sizeof(u32)) {
-				u32 v = *addr;
-
-				data |= set_upper_bits(v, shift);
-				__raw_writel(data, piobuf);
-				data = get_upper_bits(v, ushift);
-				piobuf++;
-				addr++;
-				l -= sizeof(u32);
-			}
-			/*
-			 * We still have 'extra' number of bytes leftover.
-			 */
-			if (l) {
-				u32 v = *addr;
-
-				if (l + extra >= sizeof(u32)) {
-					data |= set_upper_bits(v, shift);
-					len -= l + extra - sizeof(u32);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					__raw_writel(data, piobuf);
-					piobuf++;
-					extra = 0;
-					data = 0;
-				} else {
-					/* Clear unused upper bytes */
-					data |= clear_upper_bytes(v, l,
-								  extra);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					extra += l;
-				}
-			} else if (len == length) {
-				last = data;
-				break;
-			}
-		} else if (len == length) {
-			u32 w;
-
-			/*
-			 * Need to round up for the last dword in the
-			 * packet.
-			 */
-			w = (len + 3) >> 2;
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-			piobuf += w - 1;
-			last = ((u32 *) ss->sge.vaddr)[w - 1];
-			break;
-		} else {
-			u32 w = len >> 2;
-
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
-			piobuf += w;
-
-			extra = len & (sizeof(u32) - 1);
-			if (extra) {
-				u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-				/* Clear unused upper bytes */
-				data = clear_upper_bytes(v, extra, 0);
-			}
-		}
-		update_sge(ss, len);
-		length -= len;
-	}
-	/* Update address before sending packet. */
-	update_sge(ss, length);
-	if (flush_wc) {
-		/* must flush early everything before trigger word */
-		ipath_flush_wc();
-		__raw_writel(last, piobuf);
-		/* be sure trigger word is written */
-		ipath_flush_wc();
-	} else
-		__raw_writel(last, piobuf);
-}
-
-/*
- * Convert IB rate to delay multiplier.
- */
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
-{
-	switch (rate) {
-	case IB_RATE_2_5_GBPS: return 8;
-	case IB_RATE_5_GBPS:   return 4;
-	case IB_RATE_10_GBPS:  return 2;
-	case IB_RATE_20_GBPS:  return 1;
-	default:	       return 0;
-	}
-}
-
-/*
- * Convert delay multiplier to IB rate
- */
-static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
-{
-	switch (mult) {
-	case 8:  return IB_RATE_2_5_GBPS;
-	case 4:  return IB_RATE_5_GBPS;
-	case 2:  return IB_RATE_10_GBPS;
-	case 1:  return IB_RATE_20_GBPS;
-	default: return IB_RATE_PORT_CURRENT;
-	}
-}
-
-static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
-{
-	struct ipath_verbs_txreq *tx = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (!list_empty(&dev->txreq_free)) {
-		struct list_head *l = dev->txreq_free.next;
-
-		list_del(l);
-		tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-	return tx;
-}
-
-static inline void put_txreq(struct ipath_ibdev *dev,
-			     struct ipath_verbs_txreq *tx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	list_add(&tx->txreq.list, &dev->txreq_free);
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-}
-
-static void sdma_complete(void *cookie, int status)
-{
-	struct ipath_verbs_txreq *tx = cookie;
-	struct ipath_qp *qp = tx->qp;
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	unsigned long flags;
-	enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
-		IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (tx->wqe)
-			ipath_send_complete(qp, tx->wqe, ibs);
-		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-		     qp->s_last != qp->s_head) ||
-		    (qp->s_flags & IPATH_S_WAIT_DMA))
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		wake_up(&qp->wait_dma);
-	} else if (tx->wqe) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		ipath_send_complete(qp, tx->wqe, ibs);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-	}
-
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-		kfree(tx->txreq.map_addr);
-	put_txreq(dev, tx);
-
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-}
-
-static void decrement_dma_busy(struct ipath_qp *qp)
-{
-	unsigned long flags;
-
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-		     qp->s_last != qp->s_head) ||
-		    (qp->s_flags & IPATH_S_WAIT_DMA))
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		wake_up(&qp->wait_dma);
-	}
-}
-
-/*
- * Compute the number of clock cycles of delay before sending the next packet.
- * The multipliers reflect the number of clocks for the fastest rate so
- * one tick at 4xDDR is 8 ticks at 1xSDR.
- * If the destination port will take longer to receive a packet than
- * the outgoing link can send it, we need to delay sending the next packet
- * by the difference in time it takes the receiver to receive and the sender
- * to send this packet.
- * Note that this delay is always correct for UC and RC but not always
- * optimal for UD. For UD, the destination HCA can be different for each
- * packet, in which case, we could send packets to a different destination
- * while "waiting" for the delay. The overhead for doing this without
- * HW support is more than just paying the cost of delaying some packets
- * unnecessarily.
- */
-static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
-{
-	return (rcv_mult > snd_mult) ?
-		(plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
-}
-
-static int ipath_verbs_send_dma(struct ipath_qp *qp,
-				struct ipath_ib_header *hdr, u32 hdrwords,
-				struct ipath_sge_state *ss, u32 len,
-				u32 plen, u32 dwords)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_devdata *dd = dev->dd;
-	struct ipath_verbs_txreq *tx;
-	u32 *piobuf;
-	u32 control;
-	u32 ndesc;
-	int ret;
-
-	tx = qp->s_tx;
-	if (tx) {
-		qp->s_tx = NULL;
-		/* resend previously constructed packet */
-		atomic_inc(&qp->s_dma_busy);
-		ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
-		if (ret) {
-			qp->s_tx = tx;
-			decrement_dma_busy(qp);
-		}
-		goto bail;
-	}
-
-	tx = get_txreq(dev);
-	if (!tx) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Get the saved delay count we computed for the previous packet
-	 * and save the delay count for this packet to be used next time
-	 * we get here.
-	 */
-	control = qp->s_pkt_delay;
-	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-	tx->qp = qp;
-	atomic_inc(&qp->refcount);
-	tx->wqe = qp->s_wqe;
-	tx->txreq.callback = sdma_complete;
-	tx->txreq.callback_cookie = tx;
-	tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
-		IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
-	if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
-
-	/* VL15 packets bypass credit check */
-	if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
-		control |= 1ULL << 31;
-		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
-	}
-
-	if (len) {
-		/*
-		 * Don't try to DMA if it takes more descriptors than
-		 * the queue holds.
-		 */
-		ndesc = ipath_count_sge(ss, len);
-		if (ndesc >= dd->ipath_sdma_descq_cnt)
-			ndesc = 0;
-	} else
-		ndesc = 1;
-	if (ndesc) {
-		tx->hdr.pbc[0] = cpu_to_le32(plen);
-		tx->hdr.pbc[1] = cpu_to_le32(control);
-		memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
-		tx->txreq.sg_count = ndesc;
-		tx->map_len = (hdrwords + 2) << 2;
-		tx->txreq.map_addr = &tx->hdr;
-		atomic_inc(&qp->s_dma_busy);
-		ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
-		if (ret) {
-			/* save ss and length in dwords */
-			tx->ss = ss;
-			tx->len = dwords;
-			qp->s_tx = tx;
-			decrement_dma_busy(qp);
-		}
-		goto bail;
-	}
-
-	/* Allocate a buffer and copy the header and payload to it. */
-	tx->map_len = (plen + 1) << 2;
-	piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto err_tx;
-	}
-	tx->txreq.map_addr = piobuf;
-	tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
-	tx->txreq.sg_count = 1;
-
-	*piobuf++ = (__force u32) cpu_to_le32(plen);
-	*piobuf++ = (__force u32) cpu_to_le32(control);
-	memcpy(piobuf, hdr, hdrwords << 2);
-	ipath_copy_from_sge(piobuf + hdrwords, ss, len);
-
-	atomic_inc(&qp->s_dma_busy);
-	ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
-	/*
-	 * If we couldn't queue the DMA request, save the info
-	 * and try again later rather than destroying the
-	 * buffer and undoing the side effects of the copy.
-	 */
-	if (ret) {
-		tx->ss = NULL;
-		tx->len = 0;
-		qp->s_tx = tx;
-		decrement_dma_busy(qp);
-	}
-	dev->n_unaligned++;
-	goto bail;
-
-err_tx:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-	put_txreq(dev, tx);
-bail:
-	return ret;
-}
-
-static int ipath_verbs_send_pio(struct ipath_qp *qp,
-				struct ipath_ib_header *ibhdr, u32 hdrwords,
-				struct ipath_sge_state *ss, u32 len,
-				u32 plen, u32 dwords)
-{
-	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-	u32 *hdr = (u32 *) ibhdr;
-	u32 __iomem *piobuf;
-	unsigned flush_wc;
-	u32 control;
-	int ret;
-	unsigned long flags;
-
-	piobuf = ipath_getpiobuf(dd, plen, NULL);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Get the saved delay count we computed for the previous packet
-	 * and save the delay count for this packet to be used next time
-	 * we get here.
-	 */
-	control = qp->s_pkt_delay;
-	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-	/* VL15 packets bypass credit check */
-	if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
-		control |= 1ULL << 31;
-
-	/*
-	 * Write the length to the control qword plus any needed flags.
-	 * We have to flush after the PBC for correctness on some cpus
-	 * or WC buffer can be written out of order.
-	 */
-	writeq(((u64) control << 32) | plen, piobuf);
-	piobuf += 2;
-
-	flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
-	if (len == 0) {
-		/*
-		 * If there is just the header portion, must flush before
-		 * writing last word of header for correctness, and after
-		 * the last header word (trigger word).
-		 */
-		if (flush_wc) {
-			ipath_flush_wc();
-			__iowrite32_copy(piobuf, hdr, hdrwords - 1);
-			ipath_flush_wc();
-			__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-			ipath_flush_wc();
-		} else
-			__iowrite32_copy(piobuf, hdr, hdrwords);
-		goto done;
-	}
-
-	if (flush_wc)
-		ipath_flush_wc();
-	__iowrite32_copy(piobuf, hdr, hdrwords);
-	piobuf += hdrwords;
-
-	/* The common case is aligned and contained in one segment. */
-	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-		u32 *addr = (u32 *) ss->sge.vaddr;
-
-		/* Update address before sending packet. */
-		update_sge(ss, len);
-		if (flush_wc) {
-			__iowrite32_copy(piobuf, addr, dwords - 1);
-			/* must flush early everything before trigger word */
-			ipath_flush_wc();
-			__raw_writel(addr[dwords - 1], piobuf + dwords - 1);
-			/* be sure trigger word is written */
-			ipath_flush_wc();
-		} else
-			__iowrite32_copy(piobuf, addr, dwords);
-		goto done;
-	}
-	copy_io(piobuf, ss, len, flush_wc);
-done:
-	if (qp->s_wqe) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_verbs_send - send a packet
- * @qp: the QP to send on
- * @hdr: the packet header
- * @hdrwords: the number of 32-bit words in the header
- * @ss: the SGE to send
- * @len: the length of the packet in bytes
- */
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-		     u32 hdrwords, struct ipath_sge_state *ss, u32 len)
-{
-	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-	u32 plen;
-	int ret;
-	u32 dwords = (len + 3) >> 2;
-
-	/*
-	 * Calculate the send buffer trigger address.
-	 * The +1 counts for the pbc control dword following the pbc length.
-	 */
-	plen = hdrwords + dwords + 1;
-
-	/*
-	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-	 * can defer SDMA restart until link goes ACTIVE without
-	 * worrying about just how we got there.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_SMI ||
-	    !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
-					   plen, dwords);
-	else
-		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
-					   plen, dwords);
-
-	return ret;
-}
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-			    u64 *rwords, u64 *spkts, u64 *rpkts,
-			    u64 *xmit_wait)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ret = -EINVAL;
-		goto bail;
-	}
-	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_get_counters(struct ipath_devdata *dd,
-		       struct ipath_verbs_counters *cntrs)
-{
-	struct ipath_cregs const *crp = dd->ipath_cregs;
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ret = -EINVAL;
-		goto bail;
-	}
-	cntrs->symbol_error_counter =
-		ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
-	cntrs->link_error_recovery_counter =
-		ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
-	/*
-	 * The link downed counter counts when the other side downs the
-	 * connection.  We add in the number of times we downed the link
-	 * due to local link integrity errors to compensate.
-	 */
-	cntrs->link_downed_counter =
-		ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
-	cntrs->port_rcv_errors =
-		ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
-		ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
-		ipath_snap_cntr(dd, crp->cr_portovflcnt) +
-		ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
-		ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
-		ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
-		ipath_snap_cntr(dd, crp->cr_erricrccnt) +
-		ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
-		ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, crp->cr_badformatcnt) +
-		dd->ipath_rxfc_unsupvl_errs;
-	if (crp->cr_rxotherlocalphyerrcnt)
-		cntrs->port_rcv_errors +=
-			ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
-	if (crp->cr_rxvlerrcnt)
-		cntrs->port_rcv_errors +=
-			ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
-	cntrs->port_rcv_remphys_errors =
-		ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
-	cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
-	cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
-	cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
-	cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
-	cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors =
-		crp->cr_locallinkintegrityerrcnt ?
-		ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
-		((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-		 dd->ipath_lli_errs : dd->ipath_lli_errors);
-	cntrs->excessive_buffer_overrun_errors =
-		crp->cr_excessbufferovflcnt ?
-		ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
-		dd->ipath_overrun_thresh_errs;
-	cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
-		ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_ib_piobufavail - callback when a PIO buffer is available
- * @arg: the device pointer
- *
- * This is called from ipath_intr() at interrupt level when a PIO buffer is
- * available after ipath_verbs_send() returned an error that no buffers were
- * available.  Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just restart the send tasklet and
- * return zero).
- */
-int ipath_ib_piobufavail(struct ipath_ibdev *dev)
-{
-	struct list_head *list;
-	struct ipath_qp *qplist;
-	struct ipath_qp *qp;
-	unsigned long flags;
-
-	if (dev == NULL)
-		goto bail;
-
-	list = &dev->piowait;
-	qplist = NULL;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	while (!list_empty(list)) {
-		qp = list_entry(list->next, struct ipath_qp, piowait);
-		list_del_init(&qp->piowait);
-		qp->pio_next = qplist;
-		qplist = qp;
-		atomic_inc(&qp->refcount);
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	while (qplist != NULL) {
-		qp = qplist;
-		qplist = qp->pio_next;
-
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-
-		/* Notify ipath_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-
-bail:
-	return 0;
-}
-
-static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			      struct ib_udata *uhw)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
-	props->vendor_part_id = dev->dd->ipath_deviceid;
-	props->hw_ver = dev->dd->ipath_pcirev;
-
-	props->sys_image_guid = dev->sys_image_guid;
-
-	props->max_mr_size = ~0ull;
-	props->max_qp = ib_ipath_max_qps;
-	props->max_qp_wr = ib_ipath_max_qp_wrs;
-	props->max_sge = ib_ipath_max_sges;
-	props->max_sge_rd = ib_ipath_max_sges;
-	props->max_cq = ib_ipath_max_cqs;
-	props->max_ah = ib_ipath_max_ahs;
-	props->max_cqe = ib_ipath_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = ib_ipath_max_pds;
-	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = ib_ipath_max_srqs;
-	props->max_srq_wr = ib_ipath_max_srq_wrs;
-	props->max_srq_sge = ib_ipath_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = ipath_get_npkeys(dev->dd);
-	props->max_mcast_grp = ib_ipath_max_mcast_grps;
-	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
-}
-
-const u8 ipath_cvt_physportstate[32] = {
-	[INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
-	[INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
-	[INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
-	[INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
-	[INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
-	[INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
-	[INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
-		IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
-		IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
-		IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
-		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-	[INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
-		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-	[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
-		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-	[0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
-};
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
-{
-	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-static int ipath_query_port(struct ib_device *ibdev,
-			    u8 port, struct ib_port_attr *props)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_devdata *dd = dev->dd;
-	enum ib_mtu mtu;
-	u16 lid = dd->ipath_lid;
-	u64 ibcstat;
-
-	memset(props, 0, sizeof(*props));
-	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
-	props->lmc = dd->ipath_lmc;
-	props->sm_lid = dev->sm_lid;
-	props->sm_sl = dev->sm_sl;
-	ibcstat = dd->ipath_lastibcstat;
-	/* map LinkState to IB portinfo values.  */
-	props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
-
-	/* See phys_state_show() */
-	props->phys_state = /* MEA: assumes shift == 0 */
-		ipath_cvt_physportstate[dd->ipath_lastibcstat &
-		dd->ibcs_lts_mask];
-	props->port_cap_flags = dev->port_cap_flags;
-	props->gid_tbl_len = 1;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = ipath_get_npkeys(dd);
-	props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
-		dev->z_pkey_violations;
-	props->qkey_viol_cntr = dev->qkey_violations;
-	props->active_width = dd->ipath_link_width_active;
-	/* See rate_show() */
-	props->active_speed = dd->ipath_link_speed_active;
-	props->max_vl_num = 1;		/* VLCap = VL0 */
-	props->init_type_reply = 0;
-
-	props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-	switch (dd->ipath_ibmtu) {
-	case 4096:
-		mtu = IB_MTU_4096;
-		break;
-	case 2048:
-		mtu = IB_MTU_2048;
-		break;
-	case 1024:
-		mtu = IB_MTU_1024;
-		break;
-	case 512:
-		mtu = IB_MTU_512;
-		break;
-	case 256:
-		mtu = IB_MTU_256;
-		break;
-	default:
-		mtu = IB_MTU_2048;
-	}
-	props->active_mtu = mtu;
-	props->subnet_timeout = dev->subnet_timeout;
-
-	return 0;
-}
-
-static int ipath_modify_device(struct ib_device *device,
-			       int device_modify_mask,
-			       struct ib_device_modify *device_modify)
-{
-	int ret;
-
-	if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
-				   IB_DEVICE_MODIFY_NODE_DESC)) {
-		ret = -EOPNOTSUPP;
-		goto bail;
-	}
-
-	if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
-		memcpy(device->node_desc, device_modify->node_desc, 64);
-
-	if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
-		to_idev(device)->sys_image_guid =
-			cpu_to_be64(device_modify->sys_image_guid);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int ipath_modify_port(struct ib_device *ibdev,
-			     u8 port, int port_modify_mask,
-			     struct ib_port_modify *props)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-
-	dev->port_cap_flags |= props->set_port_cap_mask;
-	dev->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		dev->qkey_violations = 0;
-	return 0;
-}
-
-static int ipath_query_gid(struct ib_device *ibdev, u8 port,
-			   int index, union ib_gid *gid)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	int ret;
-
-	if (index >= 1) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	gid->global.subnet_prefix = dev->gid_prefix;
-	gid->global.interface_id = dev->dd->ipath_guid;
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
-				    struct ib_ucontext *context,
-				    struct ib_udata *udata)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_pd *pd;
-	struct ib_pd *ret;
-
-	/*
-	 * This is actually totally arbitrary.	Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof *pd, GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == ib_ipath_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
-
-bail:
-	return ret;
-}
-
-static int ipath_dealloc_pd(struct ib_pd *ibpd)
-{
-	struct ipath_pd *pd = to_ipd(ibpd);
-	struct ipath_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
-
-	kfree(pd);
-
-	return 0;
-}
-
-/**
- * ipath_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
-				     struct ib_ah_attr *ah_attr)
-{
-	struct ipath_ah *ah;
-	struct ib_ah *ret;
-	struct ipath_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
-
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != IPATH_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	if (ah_attr->dlid == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > pd->device->phys_port_cnt) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
-
-	ret = &ah->ibah;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int ipath_destroy_ah(struct ib_ah *ibah)
-{
-	struct ipath_ibdev *dev = to_idev(ibah->device);
-	struct ipath_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct ipath_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-	ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
-
-	return 0;
-}
-
-/**
- * ipath_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_get_npkeys(struct ipath_devdata *dd)
-{
-	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-/**
- * ipath_get_pkey - return the indexed PKEY from the port PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-	unsigned ret;
-
-	/* always a kernel port, no locking needed */
-	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-		ret = 0;
-	else
-		ret = dd->ipath_pd[0]->port_pkeys[index];
-
-	return ret;
-}
-
-static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-			    u16 *pkey)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	int ret;
-
-	if (index >= ipath_get_npkeys(dev->dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = ipath_get_pkey(dev->dd, index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the InfiniPath driver
- */
-
-static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
-						struct ib_udata *udata)
-{
-	struct ipath_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof *context, GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int ipath_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev);
-
-static void __verbs_timer(unsigned long arg)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-	/* Handle verbs layer timeouts. */
-	ipath_ib_timer(dd->verbs_dev);
-
-	mod_timer(&dd->verbs_timer, jiffies + 1);
-}
-
-static int enable_timer(struct ipath_devdata *dd)
-{
-	/*
-	 * Early chips had a design flaw where the chip and kernel idea
-	 * of the tail register don't always agree, and therefore we won't
-	 * get an interrupt on the next packet received.
-	 * If the board supports per packet receive interrupts, use it.
-	 * Otherwise, the timer function periodically checks for packets
-	 * to cover this case.
-	 * Either way, the timer is needed for verbs layer related
-	 * processing.
-	 */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-				 0x2074076542310ULL);
-		/* Enable GPIO bit 2 interrupt */
-		dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 dd->ipath_gpio_mask);
-	}
-
-	setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
-
-	dd->verbs_timer.expires = jiffies + 1;
-	add_timer(&dd->verbs_timer);
-
-	return 0;
-}
-
-static int disable_timer(struct ipath_devdata *dd)
-{
-	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-                /* Disable GPIO bit 2 interrupt */
-		dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 dd->ipath_gpio_mask);
-		/*
-		 * We might want to undo changes to debugportselect,
-		 * but how?
-		 */
-	}
-
-	del_timer_sync(&dd->verbs_timer);
-
-	return 0;
-}
-
-static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
-			        struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = ipath_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-	return 0;
-}
-
-/**
- * ipath_register_ib_device - register our device with the infiniband core
- * @dd: the device data structure
- * Return the allocated ipath_ibdev pointer or NULL on error.
- */
-int ipath_register_ib_device(struct ipath_devdata *dd)
-{
-	struct ipath_verbs_counters cntrs;
-	struct ipath_ibdev *idev;
-	struct ib_device *dev;
-	struct ipath_verbs_txreq *tx;
-	unsigned i;
-	int ret;
-
-	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-	if (idev == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	dev = &idev->ibdev;
-
-	if (dd->ipath_sdma_descq_cnt) {
-		tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
-				   GFP_KERNEL);
-		if (tx == NULL) {
-			ret = -ENOMEM;
-			goto err_tx;
-		}
-	} else
-		tx = NULL;
-	idev->txreq_bufs = tx;
-
-	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&idev->n_pds_lock);
-	spin_lock_init(&idev->n_ahs_lock);
-	spin_lock_init(&idev->n_cqs_lock);
-	spin_lock_init(&idev->n_qps_lock);
-	spin_lock_init(&idev->n_srqs_lock);
-	spin_lock_init(&idev->n_mcast_grps_lock);
-
-	spin_lock_init(&idev->qp_table.lock);
-	spin_lock_init(&idev->lk_table.lock);
-	idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-	/* Set the prefix to the default value (see ch. 4.1.1) */
-	idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
-
-	ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
-	if (ret)
-		goto err_qp;
-
-	/*
-	 * The top ib_ipath_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
-	idev->lk_table.table = kcalloc(idev->lk_table.max,
-				       sizeof(*idev->lk_table.table),
-				       GFP_KERNEL);
-	if (idev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	INIT_LIST_HEAD(&idev->pending_mmaps);
-	spin_lock_init(&idev->pending_lock);
-	idev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&idev->mmap_offset_lock);
-	INIT_LIST_HEAD(&idev->pending[0]);
-	INIT_LIST_HEAD(&idev->pending[1]);
-	INIT_LIST_HEAD(&idev->pending[2]);
-	INIT_LIST_HEAD(&idev->piowait);
-	INIT_LIST_HEAD(&idev->rnrwait);
-	INIT_LIST_HEAD(&idev->txreq_free);
-	idev->pending_index = 0;
-	idev->port_cap_flags =
-		IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
-	if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
-		idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-	idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-	/* Snapshot current HW counters to "clear" them. */
-	ipath_get_counters(dd, &cntrs);
-	idev->z_symbol_error_counter = cntrs.symbol_error_counter;
-	idev->z_link_error_recovery_counter =
-		cntrs.link_error_recovery_counter;
-	idev->z_link_downed_counter = cntrs.link_downed_counter;
-	idev->z_port_rcv_errors = cntrs.port_rcv_errors;
-	idev->z_port_rcv_remphys_errors =
-		cntrs.port_rcv_remphys_errors;
-	idev->z_port_xmit_discards = cntrs.port_xmit_discards;
-	idev->z_port_xmit_data = cntrs.port_xmit_data;
-	idev->z_port_rcv_data = cntrs.port_rcv_data;
-	idev->z_port_xmit_packets = cntrs.port_xmit_packets;
-	idev->z_port_rcv_packets = cntrs.port_rcv_packets;
-	idev->z_local_link_integrity_errors =
-		cntrs.local_link_integrity_errors;
-	idev->z_excessive_buffer_overrun_errors =
-		cntrs.excessive_buffer_overrun_errors;
-	idev->z_vl15_dropped = cntrs.vl15_dropped;
-
-	for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
-		list_add(&tx->txreq.list, &idev->txreq_free);
-
-	/*
-	 * The system image GUID is supposed to be the same for all
-	 * IB HCAs in a single system but since there can be other
-	 * device types in the system, we can't be sure this is unique.
-	 */
-	if (!sys_image_guid)
-		sys_image_guid = dd->ipath_guid;
-	idev->sys_image_guid = sys_image_guid;
-	idev->ib_unit = dd->ipath_unit;
-	idev->dd = dd;
-
-	strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
-	dev->owner = THIS_MODULE;
-	dev->node_guid = dd->ipath_guid;
-	dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
-	dev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)		|
-		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)		|
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)		|
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	dev->node_type = RDMA_NODE_IB_CA;
-	dev->phys_port_cnt = 1;
-	dev->num_comp_vectors = 1;
-	dev->dma_device = &dd->pcidev->dev;
-	dev->query_device = ipath_query_device;
-	dev->modify_device = ipath_modify_device;
-	dev->query_port = ipath_query_port;
-	dev->modify_port = ipath_modify_port;
-	dev->query_pkey = ipath_query_pkey;
-	dev->query_gid = ipath_query_gid;
-	dev->alloc_ucontext = ipath_alloc_ucontext;
-	dev->dealloc_ucontext = ipath_dealloc_ucontext;
-	dev->alloc_pd = ipath_alloc_pd;
-	dev->dealloc_pd = ipath_dealloc_pd;
-	dev->create_ah = ipath_create_ah;
-	dev->destroy_ah = ipath_destroy_ah;
-	dev->query_ah = ipath_query_ah;
-	dev->create_srq = ipath_create_srq;
-	dev->modify_srq = ipath_modify_srq;
-	dev->query_srq = ipath_query_srq;
-	dev->destroy_srq = ipath_destroy_srq;
-	dev->create_qp = ipath_create_qp;
-	dev->modify_qp = ipath_modify_qp;
-	dev->query_qp = ipath_query_qp;
-	dev->destroy_qp = ipath_destroy_qp;
-	dev->post_send = ipath_post_send;
-	dev->post_recv = ipath_post_receive;
-	dev->post_srq_recv = ipath_post_srq_receive;
-	dev->create_cq = ipath_create_cq;
-	dev->destroy_cq = ipath_destroy_cq;
-	dev->resize_cq = ipath_resize_cq;
-	dev->poll_cq = ipath_poll_cq;
-	dev->req_notify_cq = ipath_req_notify_cq;
-	dev->get_dma_mr = ipath_get_dma_mr;
-	dev->reg_user_mr = ipath_reg_user_mr;
-	dev->dereg_mr = ipath_dereg_mr;
-	dev->alloc_fmr = ipath_alloc_fmr;
-	dev->map_phys_fmr = ipath_map_phys_fmr;
-	dev->unmap_fmr = ipath_unmap_fmr;
-	dev->dealloc_fmr = ipath_dealloc_fmr;
-	dev->attach_mcast = ipath_multicast_attach;
-	dev->detach_mcast = ipath_multicast_detach;
-	dev->process_mad = ipath_process_mad;
-	dev->mmap = ipath_mmap;
-	dev->dma_ops = &ipath_dma_mapping_ops;
-	dev->get_port_immutable = ipath_port_immutable;
-
-	snprintf(dev->node_desc, sizeof(dev->node_desc),
-		 IPATH_IDSTR " %s", init_utsname()->nodename);
-
-	ret = ib_register_device(dev, NULL);
-	if (ret)
-		goto err_reg;
-
-	ret = ipath_verbs_register_sysfs(dev);
-	if (ret)
-		goto err_class;
-
-	enable_timer(dd);
-
-	goto bail;
-
-err_class:
-	ib_unregister_device(dev);
-err_reg:
-	kfree(idev->lk_table.table);
-err_lk:
-	kfree(idev->qp_table.table);
-err_qp:
-	kfree(idev->txreq_bufs);
-err_tx:
-	ib_dealloc_device(dev);
-	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-	idev = NULL;
-
-bail:
-	dd->verbs_dev = idev;
-	return ret;
-}
-
-void ipath_unregister_ib_device(struct ipath_ibdev *dev)
-{
-	struct ib_device *ibdev = &dev->ibdev;
-	u32 qps_inuse;
-
-	ib_unregister_device(ibdev);
-
-	disable_timer(dev->dd);
-
-	if (!list_empty(&dev->pending[0]) ||
-	    !list_empty(&dev->pending[1]) ||
-	    !list_empty(&dev->pending[2]))
-		ipath_dev_err(dev->dd, "pending list not empty!\n");
-	if (!list_empty(&dev->piowait))
-		ipath_dev_err(dev->dd, "piowait list not empty!\n");
-	if (!list_empty(&dev->rnrwait))
-		ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
-	if (!ipath_mcast_tree_empty())
-		ipath_dev_err(dev->dd, "multicast table memory leak!\n");
-	/*
-	 * Note that ipath_unregister_ib_device() can be called before all
-	 * the QPs are destroyed!
-	 */
-	qps_inuse = ipath_free_all_qps(&dev->qp_table);
-	if (qps_inuse)
-		ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
-			qps_inuse);
-	kfree(dev->qp_table.table);
-	kfree(dev->lk_table.table);
-	kfree(dev->txreq_bufs);
-	ib_dealloc_device(ibdev);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_ibdev *dev =
-		container_of(device, struct ipath_ibdev, ibdev.dev);
-
-	return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_ibdev *dev =
-		container_of(device, struct ipath_ibdev, ibdev.dev);
-	int ret;
-
-	ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
-	if (ret < 0)
-		goto bail;
-	strcat(buf, "\n");
-	ret = strlen(buf);
-
-bail:
-	return ret;
-}
-
-static ssize_t show_stats(struct device *device, struct device_attribute *attr,
-			  char *buf)
-{
-	struct ipath_ibdev *dev =
-		container_of(device, struct ipath_ibdev, ibdev.dev);
-	int i;
-	int len;
-
-	len = sprintf(buf,
-		      "RC resends  %d\n"
-		      "RC no QACK  %d\n"
-		      "RC ACKs     %d\n"
-		      "RC SEQ NAKs %d\n"
-		      "RC RDMA seq %d\n"
-		      "RC RNR NAKs %d\n"
-		      "RC OTH NAKs %d\n"
-		      "RC timeouts %d\n"
-		      "RC RDMA dup %d\n"
-		      "piobuf wait %d\n"
-		      "unaligned   %d\n"
-		      "PKT drops   %d\n"
-		      "WQE errs    %d\n",
-		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
-		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
-		      dev->n_other_naks, dev->n_timeouts,
-		      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
-		      dev->n_pkt_drops, dev->n_wqe_errs);
-	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
-		const struct ipath_opcode_stats *si = &dev->opstats[i];
-
-		if (!si->n_packets && !si->n_bytes)
-			continue;
-		len += sprintf(buf + len, "%02x %llu/%llu\n", i,
-			       (unsigned long long) si->n_packets,
-			       (unsigned long long) si->n_bytes);
-	}
-	return len;
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
-
-static struct device_attribute *ipath_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
-	&dev_attr_stats
-};
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
-		ret = device_create_file(&dev->dev,
-				       ipath_class_attributes[i]);
-		if (ret)
-			goto bail;
-	}
-	return 0;
-bail:
-	for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
-		device_remove_file(&dev->dev, ipath_class_attributes[i]);
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
deleted file mode 100644
index 6c70a89..0000000
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ /dev/null
@@ -1,941 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IPATH_VERBS_H
-#define IPATH_VERBS_H
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/kref.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "ipath_kernel.h"
-
-#define IPATH_MAX_RDMA_ATOMIC	4
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define IPATH_UVERBS_ABI_VERSION       2
-
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE	(IB_CQ_NEXT_COMP + 1)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK			0x20
-#define IB_NAK_PSN_ERROR		0x60
-#define IB_NAK_INVALID_REQUEST		0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR	0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST	0x64
-
-/* Flags for checking QP state (see ib_ipath_state_ops[]) */
-#define IPATH_POST_SEND_OK		0x01
-#define IPATH_POST_RECV_OK		0x02
-#define IPATH_PROCESS_RECV_OK		0x04
-#define IPATH_PROCESS_SEND_OK		0x08
-#define IPATH_PROCESS_NEXT_SEND_OK	0x10
-#define IPATH_FLUSH_SEND		0x20
-#define IPATH_FLUSH_RECV		0x40
-#define IPATH_PROCESS_OR_FLUSH_SEND \
-	(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
-
-/* IB Performance Manager status values */
-#define IB_PMA_SAMPLE_STATUS_DONE	0x00
-#define IB_PMA_SAMPLE_STATUS_STARTED	0x01
-#define IB_PMA_SAMPLE_STATUS_RUNNING	0x02
-
-/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA	cpu_to_be16(0x0001)
-#define IB_PMA_PORT_RCV_DATA	cpu_to_be16(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS	cpu_to_be16(0x0003)
-#define IB_PMA_PORT_RCV_PKTS	cpu_to_be16(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT	cpu_to_be16(0x0005)
-
-struct ib_reth {
-	__be64 vaddr;
-	__be32 rkey;
-	__be32 length;
-} __attribute__ ((packed));
-
-struct ib_atomic_eth {
-	__be32 vaddr[2];	/* unaligned so access as 2 32-bit words */
-	__be32 rkey;
-	__be64 swap_data;
-	__be64 compare_data;
-} __attribute__ ((packed));
-
-struct ipath_other_headers {
-	__be32 bth[3];
-	union {
-		struct {
-			__be32 deth[2];
-			__be32 imm_data;
-		} ud;
-		struct {
-			struct ib_reth reth;
-			__be32 imm_data;
-		} rc;
-		struct {
-			__be32 aeth;
-			__be32 atomic_ack_eth[2];
-		} at;
-		__be32 imm_data;
-		__be32 aeth;
-		struct ib_atomic_eth atomic_eth;
-	} u;
-} __attribute__ ((packed));
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct ipath_ib_header {
-	__be16 lrh[4];
-	union {
-		struct {
-			struct ib_grh grh;
-			struct ipath_other_headers oth;
-		} l;
-		struct ipath_other_headers oth;
-	} u;
-} __attribute__ ((packed));
-
-struct ipath_pio_header {
-	__le32 pbc[2];
-	struct ipath_ib_header hdr;
-} __attribute__ ((packed));
-
-/*
- * There is one struct ipath_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct ipath_mcast_qp.
- */
-struct ipath_mcast_qp {
-	struct list_head list;
-	struct ipath_qp *qp;
-};
-
-struct ipath_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct ipath_pd {
-	struct ib_pd ibpd;
-	int user;		/* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct ipath_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-};
-
-/*
- * This structure is used by ipath_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct ipath_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct ipath_cq_wc {
-	u32 head;		/* index of next entry to fill */
-	u32 tail;		/* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct ipath_cq {
-	struct ib_cq ibcq;
-	struct tasklet_struct comptask;
-	spinlock_t lock;
-	u8 notify;
-	u8 triggered;
-	struct ipath_cq_wc *queue;
-	struct ipath_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-	struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-	struct ib_pd *pd;	/* shares refcnt of ibmr.pd */
-	u64 user_base;		/* User's address for this region */
-	u64 iova;		/* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;		/* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;		/* number of ipath_segs in all the arrays */
-	u32 mapsz;		/* size of the map array */
-	struct ipath_segarray *map[0];	/* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-	struct ipath_mregion *mr;
-	void *vaddr;		/* kernel virtual address of segment */
-	u32 sge_length;		/* length of the SGE */
-	u32 length;		/* remaining length of the segment */
-	u16 m;			/* current index: mr->map[m] */
-	u16 n;			/* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct ipath_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	struct ipath_mregion mr;	/* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct ipath_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_ud_wr ud_wr;
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-	};
-
-	u32 psn;		/* first packet sequence number */
-	u32 lpsn;		/* last packet sequence number */
-	u32 ssn;		/* send sequence number */
-	u32 length;		/* total length of data in sg_list */
-	struct ipath_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct ipath_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct ipath_rwq {
-	u32 head;		/* new work requests posted to the head */
-	u32 tail;		/* receives pull requests from here. */
-	struct ipath_rwqe wq[0];
-};
-
-struct ipath_rq {
-	struct ipath_rwq *wq;
-	spinlock_t lock;
-	u32 size;		/* size of RWQE array */
-	u8 max_sge;
-};
-
-struct ipath_srq {
-	struct ib_srq ibsrq;
-	struct ipath_rq rq;
-	struct ipath_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct ipath_sge_state {
-	struct ipath_sge *sg_list;      /* next SGE to be used if any */
-	struct ipath_sge sge;   /* progress state for the current SGE */
-	u8 num_sge;
-	u8 static_rate;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct ipath_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	union {
-		struct ipath_sge_state rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct ipath_qp {
-	struct ib_qp ibqp;
-	struct ipath_qp *next;		/* link list for QPN hash table */
-	struct ipath_qp *timer_next;	/* link list for ipath_ib_timer() */
-	struct ipath_qp *pio_next;	/* link for ipath_ib_piobufavail() */
-	struct list_head piowait;	/* link for wait PIO buf */
-	struct list_head timerwait;	/* link for waiting for timeouts */
-	struct ib_ah_attr remote_ah_attr;
-	struct ipath_ib_header s_hdr;	/* next packet header to send */
-	atomic_t refcount;
-	wait_queue_head_t wait;
-	wait_queue_head_t wait_dma;
-	struct tasklet_struct s_task;
-	struct ipath_mmap_info *ip;
-	struct ipath_sge_state *s_cur_sge;
-	struct ipath_verbs_txreq *s_tx;
-	struct ipath_sge_state s_sge;	/* current send request data */
-	struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
-	struct ipath_sge_state s_ack_rdma_sge;
-	struct ipath_sge_state s_rdma_read_sge;
-	struct ipath_sge_state r_sge;	/* current receive data */
-	spinlock_t s_lock;
-	atomic_t s_dma_busy;
-	u16 s_pkt_delay;
-	u16 s_hdrwords;		/* size of s_hdr in 32 bit words */
-	u32 s_cur_size;		/* size of send packet in bytes */
-	u32 s_len;		/* total length of s_sge */
-	u32 s_rdma_read_len;	/* total length of s_rdma_read_sge */
-	u32 s_next_psn;		/* PSN for next request */
-	u32 s_last_psn;		/* last response PSN processed */
-	u32 s_psn;		/* current packet sequence number */
-	u32 s_ack_rdma_psn;	/* PSN for sending RDMA read responses */
-	u32 s_ack_psn;		/* PSN for acking sends and RDMA writes */
-	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */
-	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */
-	u64 r_wr_id;		/* ID for current receive WQE */
-	unsigned long r_aflags;
-	u32 r_len;		/* total length of r_sge */
-	u32 r_rcv_len;		/* receive data len processed */
-	u32 r_psn;		/* expected rcv packet sequence number */
-	u32 r_msn;		/* message sequence number */
-	u8 state;		/* QP state */
-	u8 s_state;		/* opcode of last packet sent */
-	u8 s_ack_state;		/* opcode of packet to ACK */
-	u8 s_nak_state;		/* non-zero if NAK is pending */
-	u8 r_state;		/* opcode of last packet received */
-	u8 r_nak_state;		/* non-zero if NAK is pending */
-	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
-	u8 r_flags;
-	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
-	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
-	u8 qp_access_flags;
-	u8 s_max_sge;		/* size of s_wq->sg_list */
-	u8 s_retry_cnt;		/* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 s_retry;		/* requester retry counter */
-	u8 s_rnr_retry;		/* requester RNR retry counter */
-	u8 s_pkey_index;	/* PKEY index to use */
-	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */
-	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
-	u8 s_flags;
-	u8 s_dmult;
-	u8 s_draining;
-	u8 timeout;		/* Timeout for this QP */
-	enum ib_mtu path_mtu;
-	u32 remote_qpn;
-	u32 qkey;		/* QKEY for this QP (for UD or RD) */
-	u32 s_size;		/* send work queue size */
-	u32 s_head;		/* new entries added here */
-	u32 s_tail;		/* next entry to process */
-	u32 s_cur;		/* current work queue entry */
-	u32 s_last;		/* last un-ACK'ed entry */
-	u32 s_ssn;		/* SSN of tail entry */
-	u32 s_lsn;		/* limit sequence number (credit) */
-	struct ipath_swqe *s_wq;	/* send work queue */
-	struct ipath_swqe *s_wqe;
-	struct ipath_sge *r_ud_sg_list;
-	struct ipath_rq r_rq;		/* receive work queue */
-	struct ipath_sge r_sg_list[0];	/* verified SGEs */
-};
-
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define IPATH_R_WRID_VALID	0
-
-/*
- * Bit definitions for r_flags.
- */
-#define IPATH_R_REUSE_SGE	0x01
-#define IPATH_R_RDMAR_SEQ	0x02
-
-/*
- * Bit definitions for s_flags.
- *
- * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
- *			   before processing the next SWQE
- * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
- *			   before processing the next SWQE
- * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
- * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *		      next send completion entry not via send DMA.
- */
-#define IPATH_S_SIGNAL_REQ_WR	0x01
-#define IPATH_S_FENCE_PENDING	0x02
-#define IPATH_S_RDMAR_PENDING	0x04
-#define IPATH_S_ACK_PENDING	0x08
-#define IPATH_S_BUSY		0x10
-#define IPATH_S_WAITING		0x20
-#define IPATH_S_WAIT_SSN_CREDIT	0x40
-#define IPATH_S_WAIT_DMA	0x80
-
-#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
-	IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
-
-#define IPATH_PSN_CREDIT	512
-
-/*
- * Since struct ipath_swqe is not a fixed size, we can't simply index into
- * struct ipath_qp.s_wq.  This function does the array index computation.
- */
-static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
-					      unsigned n)
-{
-	return (struct ipath_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct ipath_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct ipath_sge)) * n);
-}
-
-/*
- * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rwq.wq.  This function does the array index computation.
- */
-static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
-					      unsigned n)
-{
-	return (struct ipath_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct ipath_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	atomic_t n_free;
-	void *page;
-};
-
-struct ipath_qp_table {
-	spinlock_t lock;
-	u32 last;		/* last QP number allocated */
-	u32 max;		/* size of the hash table */
-	u32 nmaps;		/* size of the map table */
-	struct ipath_qp **table;
-	/* bit map of free numbers */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct ipath_lkey_table {
-	spinlock_t lock;
-	u32 next;		/* next unused index (speeds search) */
-	u32 gen;		/* generation count */
-	u32 max;		/* size of the table */
-	struct ipath_mregion **table;
-};
-
-struct ipath_opcode_stats {
-	u64 n_packets;		/* number of packets */
-	u64 n_bytes;		/* total number of bytes */
-};
-
-struct ipath_ibdev {
-	struct ib_device ibdev;
-	struct ipath_devdata *dd;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock;
-	u32 mmap_offset;
-	int ib_unit;		/* This is the device number */
-	u16 sm_lid;		/* in host order */
-	u8 sm_sl;
-	u8 mkeyprot;
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-
-	/* The following fields are really per port. */
-	struct ipath_qp_table qp_table;
-	struct ipath_lkey_table lk_table;
-	struct list_head pending[3];	/* FIFO of QPs waiting for ACKs */
-	struct list_head piowait;	/* list for wait PIO buf */
-	struct list_head txreq_free;
-	void *txreq_bufs;
-	/* list of QPs waiting for RNR timer */
-	struct list_head rnrwait;
-	spinlock_t pending_lock;
-	__be64 sys_image_guid;	/* in network order */
-	__be64 gid_prefix;	/* in network order */
-	__be64 mkey;
-
-	u32 n_pds_allocated;	/* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;	/* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;	/* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;	/* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;	/* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
-
-	u64 ipath_sword;	/* total dwords sent (sample result) */
-	u64 ipath_rword;	/* total dwords received (sample result) */
-	u64 ipath_spkts;	/* total packets sent (sample result) */
-	u64 ipath_rpkts;	/* total packets received (sample result) */
-	/* # of ticks no data sent (sample result) */
-	u64 ipath_xmit_wait;
-	u64 rcv_errors;		/* # of packets with SW detected rcv errs */
-	u64 n_unicast_xmit;	/* total unicast packets sent */
-	u64 n_unicast_rcv;	/* total unicast packets received */
-	u64 n_multicast_xmit;	/* total multicast packets sent */
-	u64 n_multicast_rcv;	/* total multicast packets received */
-	u64 z_symbol_error_counter;		/* starting count for PMA */
-	u64 z_link_error_recovery_counter;	/* starting count for PMA */
-	u64 z_link_downed_counter;		/* starting count for PMA */
-	u64 z_port_rcv_errors;			/* starting count for PMA */
-	u64 z_port_rcv_remphys_errors;		/* starting count for PMA */
-	u64 z_port_xmit_discards;		/* starting count for PMA */
-	u64 z_port_xmit_data;			/* starting count for PMA */
-	u64 z_port_rcv_data;			/* starting count for PMA */
-	u64 z_port_xmit_packets;		/* starting count for PMA */
-	u64 z_port_rcv_packets;			/* starting count for PMA */
-	u32 z_pkey_violations;			/* starting count for PMA */
-	u32 z_local_link_integrity_errors;	/* starting count for PMA */
-	u32 z_excessive_buffer_overrun_errors;	/* starting count for PMA */
-	u32 z_vl15_dropped;			/* starting count for PMA */
-	u32 n_rc_resends;
-	u32 n_rc_acks;
-	u32 n_rc_qacks;
-	u32 n_seq_naks;
-	u32 n_rdma_seq;
-	u32 n_rnr_naks;
-	u32 n_other_naks;
-	u32 n_timeouts;
-	u32 n_pkt_drops;
-	u32 n_vl15_dropped;
-	u32 n_wqe_errs;
-	u32 n_rdma_dup_busy;
-	u32 n_piowait;
-	u32 n_unaligned;
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 pending_index;	/* which pending queue is active */
-	u8 pma_sample_status;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
-	struct ipath_opcode_stats opstats[128];
-};
-
-struct ipath_verbs_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-	u32 vl15_dropped;
-};
-
-struct ipath_verbs_txreq {
-	struct ipath_qp         *qp;
-	struct ipath_swqe       *wqe;
-	u32                      map_len;
-	u32                      len;
-	struct ipath_sge_state  *ss;
-	struct ipath_pio_header  hdr;
-	struct ipath_sdma_txreq  txreq;
-};
-
-static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct ipath_mr, ibmr);
-}
-
-static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct ipath_pd, ibpd);
-}
-
-static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct ipath_ah, ibah);
-}
-
-static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct ipath_cq, ibcq);
-}
-
-static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct ipath_srq, ibsrq);
-}
-
-static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct ipath_qp, ibqp);
-}
-
-static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct ipath_ibdev, ibdev);
-}
-
-/*
- * This must be called with s_lock held.
- */
-static inline void ipath_schedule_send(struct ipath_qp *qp)
-{
-	if (qp->s_flags & IPATH_S_ANY_WAIT)
-		qp->s_flags &= ~IPATH_S_ANY_WAIT;
-	if (!(qp->s_flags & IPATH_S_BUSY))
-		tasklet_hi_schedule(&qp->s_task);
-}
-
-int ipath_process_mad(struct ib_device *ibdev,
-		      int mad_flags,
-		      u8 port_num,
-		      const struct ib_wc *in_wc,
-		      const struct ib_grh *in_grh,
-		      const struct ib_mad_hdr *in, size_t in_mad_size,
-		      struct ib_mad_hdr *out, size_t *out_mad_size,
-		      u16 *out_mad_pkey_index);
-
-/*
- * Compare the lower 24 bits of the two values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int ipath_cmp24(u32 a, u32 b)
-{
-	return (((int) a) - ((int) b)) << 8;
-}
-
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-			    u64 *rwords, u64 *spkts, u64 *rpkts,
-			    u64 *xmit_wait);
-
-int ipath_get_counters(struct ipath_devdata *dd,
-		       struct ipath_verbs_counters *cntrs);
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_mcast_tree_empty(void);
-
-__be32 ipath_compute_aeth(struct ipath_qp *qp);
-
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
-
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-			      struct ib_qp_init_attr *init_attr,
-			      struct ib_udata *udata);
-
-int ipath_destroy_qp(struct ib_qp *ibqp);
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask, struct ib_udata *udata);
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
-
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
-
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-		     u32 hdrwords, struct ipath_sge_state *ss, u32 len);
-
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
-
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
-		     struct ipath_mregion *mr);
-
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-		  struct ib_sge *sge, int acc);
-
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-		  u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			   struct ib_recv_wr **bad_wr);
-
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-				struct ib_srq_init_attr *srq_init_attr,
-				struct ib_udata *udata);
-
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask,
-		     struct ib_udata *udata);
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int ipath_destroy_srq(struct ib_srq *ibsrq);
-
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-			      const struct ib_cq_init_attr *attr,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata);
-
-int ipath_destroy_cq(struct ib_cq *ibcq);
-
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				u64 virt_addr, int mr_access_flags,
-				struct ib_udata *udata);
-
-int ipath_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			       struct ib_fmr_attr *fmr_attr);
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-		       int list_len, u64 iova);
-
-int ipath_unmap_fmr(struct list_head *fmr_list);
-
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
-
-void ipath_release_mmap_info(struct kref *ref);
-
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-					       u32 size,
-					       struct ib_ucontext *context,
-					       void *obj);
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-			    struct ipath_mmap_info *ip,
-			    u32 size, void *obj);
-
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-void ipath_insert_rnr_queue(struct ipath_qp *qp);
-
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-		   u32 *lengthp, struct ipath_sge_state *ss);
-
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
-
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-		   struct ib_global_route *grh, u32 hwords, u32 nwords);
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-			   struct ipath_other_headers *ohdr,
-			   u32 bth0, u32 bth2);
-
-void ipath_do_send(unsigned long data);
-
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-			 enum ib_wc_status status);
-
-int ipath_make_rc_req(struct ipath_qp *qp);
-
-int ipath_make_uc_req(struct ipath_qp *qp);
-
-int ipath_make_ud_req(struct ipath_qp *qp);
-
-int ipath_register_ib_device(struct ipath_devdata *);
-
-void ipath_unregister_ib_device(struct ipath_ibdev *);
-
-void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
-
-int ipath_ib_piobufavail(struct ipath_ibdev *);
-
-unsigned ipath_get_npkeys(struct ipath_devdata *);
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *);
-
-unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
-
-extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
-
-/*
- * Below converts HCA-specific LinkTrainingState to IB PhysPortState
- * values.
- */
-extern const u8 ipath_cvt_physportstate[];
-#define IB_PHYSPORTSTATE_SLEEP 1
-#define IB_PHYSPORTSTATE_POLL 2
-#define IB_PHYSPORTSTATE_DISABLED 3
-#define IB_PHYSPORTSTATE_CFG_TRAIN 4
-#define IB_PHYSPORTSTATE_LINKUP 5
-#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
-
-extern const int ib_ipath_state_ops[];
-
-extern unsigned int ib_ipath_lkey_table_size;
-
-extern unsigned int ib_ipath_max_cqes;
-
-extern unsigned int ib_ipath_max_cqs;
-
-extern unsigned int ib_ipath_max_qp_wrs;
-
-extern unsigned int ib_ipath_max_qps;
-
-extern unsigned int ib_ipath_max_sges;
-
-extern unsigned int ib_ipath_max_mcast_grps;
-
-extern unsigned int ib_ipath_max_mcast_qp_attached;
-
-extern unsigned int ib_ipath_max_srqs;
-
-extern unsigned int ib_ipath_max_srq_sges;
-
-extern unsigned int ib_ipath_max_srq_wrs;
-
-extern const u32 ib_ipath_rnr_table[];
-
-extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
-
-#endif				/* IPATH_VERBS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c b/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
deleted file mode 100644
index 72d476f..0000000
--- a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-#include <linux/slab.h>
-
-#include "ipath_verbs.h"
-
-/*
- * Global table of GID to attached QPs.
- * The table is global to all ipath devices since a send from one QP/device
- * needs to be locally routed to any locally attached QPs on the same
- * or different device.
- */
-static struct rb_root mcast_tree;
-static DEFINE_SPINLOCK(mcast_lock);
-
-/**
- * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
-{
-	struct ipath_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
-{
-	struct ipath_qp *qp = mqp->qp;
-
-	/* Notify ipath_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * ipath_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
-{
-	struct ipath_mcast *mcast;
-
-	mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void ipath_mcast_free(struct ipath_mcast *mcast)
-{
-	struct ipath_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		ipath_mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * ipath_mcast_find - search the global table for the given multicast GID
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct ipath_mcast *mcast;
-
-	spin_lock_irqsave(&mcast_lock, flags);
-	n = mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct ipath_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&mcast_lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&mcast_lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * ipath_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int ipath_mcast_add(struct ipath_ibdev *dev,
-			   struct ipath_mcast *mcast,
-			   struct ipath_mcast_qp *mqp)
-{
-	struct rb_node **n = &mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&mcast_lock);
-
-	while (*n) {
-		struct ipath_mcast *tmcast;
-		struct ipath_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&mcast_lock);
-
-	return ret;
-}
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	struct ipath_mcast *mcast;
-	struct ipath_mcast_qp *mqp;
-	int ret;
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = ipath_mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = ipath_mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		ipath_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	switch (ipath_mcast_add(dev, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: can't attach the same QP twice. */
-		ipath_mcast_qp_free(mqp);
-		ipath_mcast_free(mcast);
-		ret = -EINVAL;
-		goto bail;
-	case EEXIST:		/* The mcast wasn't used */
-		ipath_mcast_free(mcast);
-		break;
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		ipath_mcast_qp_free(mqp);
-		ipath_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	struct ipath_mcast *mcast = NULL;
-	struct ipath_mcast_qp *p, *tmp;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	spin_lock_irq(&mcast_lock);
-
-	/* Find the GID in the mcast table. */
-	n = mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&mcast_lock);
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		mcast = rb_entry(n, struct ipath_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&mcast_lock);
-
-	if (p) {
-		/*
-		 * Wait for any list walkers to finish before freeing the
-		 * list element.
-		 */
-		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-		ipath_mcast_qp_free(p);
-	}
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		ipath_mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_mcast_tree_empty(void)
-{
-	return mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c b/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
deleted file mode 100644
index 1a7e20a..0000000
--- a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on PowerPC only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * Nothing to do on PowerPC, so just return without error.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-	return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c b/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
deleted file mode 100644
index 7b6e4c8..0000000
--- a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on x86_64 only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include <linux/pci.h>
-#include <asm/processor.h>
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
- * write combining.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-	int ret = 0;
-	u64 pioaddr, piolen;
-	unsigned bits;
-	const unsigned long addr = pci_resource_start(dd->pcidev, 0);
-	const size_t len = pci_resource_len(dd->pcidev, 0);
-
-	/*
-	 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
-	 * chip.  Linux (possibly the hardware) requires it to be on a power
-	 * of 2 address matching the length (which has to be a power of 2).
-	 * For rev1, that means the base address, for rev2, it will be just
-	 * the PIO buffers themselves.
-	 * For chips with two sets of buffers, the calculations are
-	 * somewhat more complicated; we need to sum, and the piobufbase
-	 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
-	 * The buffers are still packed, so a single range covers both.
-	 */
-	if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
-		unsigned long pio2kbase, pio4kbase;
-		pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
-		pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
-		if (pio2kbase < pio4kbase) { /* all, for now */
-			pioaddr = addr + pio2kbase;
-			piolen = pio4kbase - pio2kbase +
-				dd->ipath_piobcnt4k * dd->ipath_4kalign;
-		} else {
-			pioaddr = addr + pio4kbase;
-			piolen = pio2kbase - pio4kbase +
-				dd->ipath_piobcnt2k * dd->ipath_palign;
-		}
-	} else {  /* single buffer size (2K, currently) */
-		pioaddr = addr + dd->ipath_piobufbase;
-		piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
-			dd->ipath_piobcnt4k * dd->ipath_4kalign;
-	}
-
-	for (bits = 0; !(piolen & (1ULL << bits)); bits++)
-		/* do nothing */ ;
-
-	if (piolen != (1ULL << bits)) {
-		piolen >>= bits;
-		while (piolen >>= 1)
-			bits++;
-		piolen = 1ULL << (bits + 1);
-	}
-	if (pioaddr & (piolen - 1)) {
-		u64 atmp;
-		ipath_dbg("pioaddr %llx not on right boundary for size "
-			  "%llx, fixing\n",
-			  (unsigned long long) pioaddr,
-			  (unsigned long long) piolen);
-		atmp = pioaddr & ~(piolen - 1);
-		if (atmp < addr || (atmp + piolen) > (addr + len)) {
-			ipath_dev_err(dd, "No way to align address/size "
-				      "(%llx/%llx), no WC mtrr\n",
-				      (unsigned long long) atmp,
-				      (unsigned long long) piolen << 1);
-			ret = -ENODEV;
-		} else {
-			ipath_dbg("changing WC base from %llx to %llx, "
-				  "len from %llx to %llx\n",
-				  (unsigned long long) pioaddr,
-				  (unsigned long long) atmp,
-				  (unsigned long long) piolen,
-				  (unsigned long long) piolen << 1);
-			pioaddr = atmp;
-			piolen <<= 1;
-		}
-	}
-
-	if (!ret) {
-		dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
-		if (dd->wc_cookie < 0) {
-			ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
-			ret = -ENODEV;
-		} else if (dd->wc_cookie == 0)
-			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
-		else
-			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
-	}
-
-	return ret;
-}
-
-/**
- * ipath_disable_wc - disable write combining for MMIO writes to the device
- * @dd: infinipath device
- */
-void ipath_disable_wc(struct ipath_devdata *dd)
-{
-	arch_phys_wc_del(dd->wc_cookie);
-}
diff --git a/drivers/staging/speakup/Kconfig b/drivers/staging/speakup/Kconfig
index efd6f45..7e8037e 100644
--- a/drivers/staging/speakup/Kconfig
+++ b/drivers/staging/speakup/Kconfig
@@ -1,7 +1,7 @@
 menu "Speakup console speech"
 
 config SPEAKUP
-	depends on VT
+	depends on VT && !MN10300
 	tristate "Speakup core"
 	---help---
 		This is the Speakup screen reader.  Think of it as a
diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
index 63c59bc..30cf973 100644
--- a/drivers/staging/speakup/main.c
+++ b/drivers/staging/speakup/main.c
@@ -264,8 +264,9 @@
 	.notifier_call = vt_notifier_call,
 };
 
-static unsigned char get_attributes(u16 *pos)
+static unsigned char get_attributes(struct vc_data *vc, u16 *pos)
 {
+	pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
 	return (u_char) (scr_readw(pos) >> 8);
 }
 
@@ -275,7 +276,7 @@
 	spk_y = spk_cy = vc->vc_y;
 	spk_pos = spk_cp = vc->vc_pos;
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) spk_pos);
+	spk_attr = get_attributes(vc, (u_short *)spk_pos);
 }
 
 static void bleep(u_short val)
@@ -469,8 +470,12 @@
 	u16 ch = ' ';
 
 	if (vc && pos) {
-		u16 w = scr_readw(pos);
-		u16 c = w & 0xff;
+		u16 w;
+		u16 c;
+
+		pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
+		w = scr_readw(pos);
+		c = w & 0xff;
 
 		if (w & vc->vc_hi_font_mask)
 			c |= 0x100;
@@ -746,7 +751,7 @@
 	u_char tmp2;
 
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) spk_pos);
+	spk_attr = get_attributes(vc, (u_short *)spk_pos);
 	for (i = 0; i < vc->vc_cols; i++) {
 		buf[i] = (u_char) get_char(vc, (u_short *) tmp, &tmp2);
 		tmp += 2;
@@ -811,7 +816,7 @@
 	u_short saved_punc_mask = spk_punc_mask;
 
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) from);
+	spk_attr = get_attributes(vc, (u_short *)from);
 	while (from < to) {
 		buf[i++] = (char)get_char(vc, (u_short *) from, &tmp);
 		from += 2;
@@ -886,7 +891,7 @@
 	sentmarks[bn][0] = &sentbuf[bn][0];
 	i = 0;
 	spk_old_attr = spk_attr;
-	spk_attr = get_attributes((u_short *) start);
+	spk_attr = get_attributes(vc, (u_short *)start);
 
 	while (start < end) {
 		sentbuf[bn][i] = (char)get_char(vc, (u_short *) start, &tmp);
@@ -1585,7 +1590,7 @@
 		u16 *ptr;
 
 		for (ptr = start; ptr < end; ptr++) {
-			ch = get_attributes(ptr);
+			ch = get_attributes(vc, ptr);
 			bg = (ch & 0x70) >> 4;
 			speakup_console[vc_num]->ht.bgcount[bg]++;
 		}
diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c
index aa5ab6c..41ef099 100644
--- a/drivers/staging/speakup/selection.c
+++ b/drivers/staging/speakup/selection.c
@@ -142,7 +142,9 @@
 	struct tty_ldisc *ld;
 	DECLARE_WAITQUEUE(wait, current);
 
-	ld = tty_ldisc_ref_wait(tty);
+	ld = tty_ldisc_ref(tty);
+	if (!ld)
+		goto tty_unref;
 	tty_buffer_lock_exclusive(&vc->port);
 
 	add_wait_queue(&vc->paste_wait, &wait);
@@ -162,6 +164,7 @@
 
 	tty_buffer_unlock_exclusive(&vc->port);
 	tty_ldisc_deref(ld);
+tty_unref:
 	tty_kref_put(tty);
 }
 
diff --git a/drivers/staging/speakup/serialio.c b/drivers/staging/speakup/serialio.c
index 3b5835b..a5bbb33 100644
--- a/drivers/staging/speakup/serialio.c
+++ b/drivers/staging/speakup/serialio.c
@@ -6,6 +6,11 @@
 #include "spk_priv.h"
 #include "serialio.h"
 
+#include <linux/serial_core.h>
+/* WARNING:  Do not change this to <linux/serial.h> without testing that
+ * SERIAL_PORT_DFNS does get defined to the appropriate value. */
+#include <asm/serial.h>
+
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif
@@ -23,9 +28,15 @@
 	int baud = 9600, quot = 0;
 	unsigned int cval = 0;
 	int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8;
-	const struct old_serial_port *ser = rs_table + index;
+	const struct old_serial_port *ser;
 	int err;
 
+	if (index >= ARRAY_SIZE(rs_table)) {
+		pr_info("no port info for ttyS%d\n", index);
+		return NULL;
+	}
+	ser = rs_table + index;
+
 	/*	Divisor, bytesize and parity */
 	quot = ser->baud_base / baud;
 	cval = cflag & (CSIZE | CSTOPB);
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 3327c49..713c63d9 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -898,7 +898,7 @@
 	da->unmap_zeroes_data = flag;
 	pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n",
 		 da->da_dev, flag);
-	return 0;
+	return count;
 }
 
 /*
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index cacd97a..da457e2 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -828,6 +828,50 @@
 	return dev;
 }
 
+/*
+ * Check if the underlying struct block_device request_queue supports
+ * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
+ * in ATA and we need to set TPE=1
+ */
+bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+				       struct request_queue *q, int block_size)
+{
+	if (!blk_queue_discard(q))
+		return false;
+
+	attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
+								block_size;
+	/*
+	 * Currently hardcoded to 1 in Linux/SCSI code..
+	 */
+	attrib->max_unmap_block_desc_count = 1;
+	attrib->unmap_granularity = q->limits.discard_granularity / block_size;
+	attrib->unmap_granularity_alignment = q->limits.discard_alignment /
+								block_size;
+	attrib->unmap_zeroes_data = q->limits.discard_zeroes_data;
+	return true;
+}
+EXPORT_SYMBOL(target_configure_unmap_from_queue);
+
+/*
+ * Convert from blocksize advertised to the initiator to the 512 byte
+ * units unconditionally used by the Linux block layer.
+ */
+sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
+{
+	switch (dev->dev_attrib.block_size) {
+	case 4096:
+		return lb << 3;
+	case 2048:
+		return lb << 2;
+	case 1024:
+		return lb << 1;
+	default:
+		return lb;
+	}
+}
+EXPORT_SYMBOL(target_to_linux_sector);
+
 int target_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index e319570..75f0f08 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -160,25 +160,11 @@
 			" block_device blocks: %llu logical_block_size: %d\n",
 			dev_size, div_u64(dev_size, fd_dev->fd_block_size),
 			fd_dev->fd_block_size);
-		/*
-		 * Check if the underlying struct block_device request_queue supports
-		 * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
-		 * in ATA and we need to set TPE=1
-		 */
-		if (blk_queue_discard(q)) {
-			dev->dev_attrib.max_unmap_lba_count =
-				q->limits.max_discard_sectors;
-			/*
-			 * Currently hardcoded to 1 in Linux/SCSI code..
-			 */
-			dev->dev_attrib.max_unmap_block_desc_count = 1;
-			dev->dev_attrib.unmap_granularity =
-				q->limits.discard_granularity >> 9;
-			dev->dev_attrib.unmap_granularity_alignment =
-				q->limits.discard_alignment;
+
+		if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
+						      fd_dev->fd_block_size))
 			pr_debug("IFILE: BLOCK Discard support available,"
-					" disabled by default\n");
-		}
+				 " disabled by default\n");
 		/*
 		 * Enable write same emulation for IBLOCK and use 0xFFFF as
 		 * the smaller WRITE_SAME(10) only has a two-byte block count.
@@ -490,9 +476,12 @@
 	if (S_ISBLK(inode->i_mode)) {
 		/* The backend is block device, use discard */
 		struct block_device *bdev = inode->i_bdev;
+		struct se_device *dev = cmd->se_dev;
 
-		ret = blkdev_issue_discard(bdev, lba,
-				nolb, GFP_KERNEL, 0);
+		ret = blkdev_issue_discard(bdev,
+					   target_to_linux_sector(dev, lba),
+					   target_to_linux_sector(dev,  nolb),
+					   GFP_KERNEL, 0);
 		if (ret < 0) {
 			pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n",
 				ret);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 5a2899f..abe4eb9 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -121,29 +121,11 @@
 	dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
 	dev->dev_attrib.hw_queue_depth = q->nr_requests;
 
-	/*
-	 * Check if the underlying struct block_device request_queue supports
-	 * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
-	 * in ATA and we need to set TPE=1
-	 */
-	if (blk_queue_discard(q)) {
-		dev->dev_attrib.max_unmap_lba_count =
-				q->limits.max_discard_sectors;
-
-		/*
-		 * Currently hardcoded to 1 in Linux/SCSI code..
-		 */
-		dev->dev_attrib.max_unmap_block_desc_count = 1;
-		dev->dev_attrib.unmap_granularity =
-				q->limits.discard_granularity >> 9;
-		dev->dev_attrib.unmap_granularity_alignment =
-				q->limits.discard_alignment;
-		dev->dev_attrib.unmap_zeroes_data =
-				q->limits.discard_zeroes_data;
-
+	if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
+					      dev->dev_attrib.hw_block_size))
 		pr_debug("IBLOCK: BLOCK Discard support available,"
-				" disabled by default\n");
-	}
+			 " disabled by default\n");
+
 	/*
 	 * Enable write same emulation for IBLOCK and use 0xFFFF as
 	 * the smaller WRITE_SAME(10) only has a two-byte block count.
@@ -415,9 +397,13 @@
 iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
 {
 	struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
+	struct se_device *dev = cmd->se_dev;
 	int ret;
 
-	ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0);
+	ret = blkdev_issue_discard(bdev,
+				   target_to_linux_sector(dev, lba),
+				   target_to_linux_sector(dev,  nolb),
+				   GFP_KERNEL, 0);
 	if (ret < 0) {
 		pr_err("blkdev_issue_discard() failed: %d\n", ret);
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -433,8 +419,10 @@
 	struct scatterlist *sg;
 	struct bio *bio;
 	struct bio_list list;
-	sector_t block_lba = cmd->t_task_lba;
-	sector_t sectors = sbc_get_write_same_sectors(cmd);
+	struct se_device *dev = cmd->se_dev;
+	sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
+	sector_t sectors = target_to_linux_sector(dev,
+					sbc_get_write_same_sectors(cmd));
 
 	if (cmd->prot_op) {
 		pr_err("WRITE_SAME: Protection information with IBLOCK"
@@ -648,12 +636,12 @@
 		  enum dma_data_direction data_direction)
 {
 	struct se_device *dev = cmd->se_dev;
+	sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
 	struct iblock_req *ibr;
 	struct bio *bio, *bio_start;
 	struct bio_list list;
 	struct scatterlist *sg;
 	u32 sg_num = sgl_nents;
-	sector_t block_lba;
 	unsigned bio_cnt;
 	int rw = 0;
 	int i;
@@ -679,24 +667,6 @@
 		rw = READ;
 	}
 
-	/*
-	 * Convert the blocksize advertised to the initiator to the 512 byte
-	 * units unconditionally used by the Linux block layer.
-	 */
-	if (dev->dev_attrib.block_size == 4096)
-		block_lba = (cmd->t_task_lba << 3);
-	else if (dev->dev_attrib.block_size == 2048)
-		block_lba = (cmd->t_task_lba << 2);
-	else if (dev->dev_attrib.block_size == 1024)
-		block_lba = (cmd->t_task_lba << 1);
-	else if (dev->dev_attrib.block_size == 512)
-		block_lba = cmd->t_task_lba;
-	else {
-		pr_err("Unsupported SCSI -> BLOCK LBA conversion:"
-				" %u\n", dev->dev_attrib.block_size);
-		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-	}
-
 	ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
 	if (!ibr)
 		goto fail;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index dae0750c..db4412f 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -141,7 +141,6 @@
 int	transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int);
 int	transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int);
 int	transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int);
-bool	target_stop_cmd(struct se_cmd *cmd, unsigned long *flags);
 void	transport_clear_lun_ref(struct se_lun *);
 void	transport_send_task_abort(struct se_cmd *);
 sense_reason_t	target_cmd_size_check(struct se_cmd *cmd, unsigned int size);
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index fcdcb11..4f229e7 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -68,23 +68,25 @@
 
 	if (dev) {
 		spin_lock_irqsave(&dev->se_tmr_lock, flags);
-		list_del(&tmr->tmr_list);
+		list_del_init(&tmr->tmr_list);
 		spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
 	}
 
 	kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(
-	struct se_node_acl *tmr_nacl,
-	struct se_cmd *cmd,
-	int tas)
+static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
-	bool remove = true;
+	unsigned long flags;
+	bool remove = true, send_tas;
 	/*
 	 * TASK ABORTED status (TAS) bit support
 	 */
-	if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	send_tas = (cmd->transport_state & CMD_T_TAS);
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
+	if (send_tas) {
 		remove = false;
 		transport_send_task_abort(cmd);
 	}
@@ -107,6 +109,46 @@
 	return 1;
 }
 
+static bool __target_check_io_state(struct se_cmd *se_cmd,
+				    struct se_session *tmr_sess, int tas)
+{
+	struct se_session *sess = se_cmd->se_sess;
+
+	assert_spin_locked(&sess->sess_cmd_lock);
+	WARN_ON_ONCE(!irqs_disabled());
+	/*
+	 * If command already reached CMD_T_COMPLETE state within
+	 * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown,
+	 * this se_cmd has been passed to fabric driver and will
+	 * not be aborted.
+	 *
+	 * Otherwise, obtain a local se_cmd->cmd_kref now for TMR
+	 * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as
+	 * long as se_cmd->cmd_kref is still active unless zero.
+	 */
+	spin_lock(&se_cmd->t_state_lock);
+	if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) {
+		pr_debug("Attempted to abort io tag: %llu already complete or"
+			" fabric stop, skipping\n", se_cmd->tag);
+		spin_unlock(&se_cmd->t_state_lock);
+		return false;
+	}
+	if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
+		pr_debug("Attempted to abort io tag: %llu already shutdown,"
+			" skipping\n", se_cmd->tag);
+		spin_unlock(&se_cmd->t_state_lock);
+		return false;
+	}
+	se_cmd->transport_state |= CMD_T_ABORTED;
+
+	if ((tmr_sess != se_cmd->se_sess) && tas)
+		se_cmd->transport_state |= CMD_T_TAS;
+
+	spin_unlock(&se_cmd->t_state_lock);
+
+	return kref_get_unless_zero(&se_cmd->cmd_kref);
+}
+
 void core_tmr_abort_task(
 	struct se_device *dev,
 	struct se_tmr_req *tmr,
@@ -130,34 +172,21 @@
 		if (tmr->ref_task_tag != ref_tag)
 			continue;
 
-		if (!kref_get_unless_zero(&se_cmd->cmd_kref))
-			continue;
-
 		printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
 			se_cmd->se_tfo->get_fabric_name(), ref_tag);
 
-		spin_lock(&se_cmd->t_state_lock);
-		if (se_cmd->transport_state & CMD_T_COMPLETE) {
-			printk("ABORT_TASK: ref_tag: %llu already complete,"
-			       " skipping\n", ref_tag);
-			spin_unlock(&se_cmd->t_state_lock);
+		if (!__target_check_io_state(se_cmd, se_sess, 0)) {
 			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-
-			target_put_sess_cmd(se_cmd);
-
 			goto out;
 		}
-		se_cmd->transport_state |= CMD_T_ABORTED;
-		spin_unlock(&se_cmd->t_state_lock);
-
 		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 		cancel_work_sync(&se_cmd->work);
 		transport_wait_for_tasks(se_cmd);
 
-		target_put_sess_cmd(se_cmd);
 		transport_cmd_finish_abort(se_cmd, true);
+		target_put_sess_cmd(se_cmd);
 
 		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
 				" ref_tag: %llu\n", ref_tag);
@@ -178,9 +207,11 @@
 	struct list_head *preempt_and_abort_list)
 {
 	LIST_HEAD(drain_tmr_list);
+	struct se_session *sess;
 	struct se_tmr_req *tmr_p, *tmr_pp;
 	struct se_cmd *cmd;
 	unsigned long flags;
+	bool rc;
 	/*
 	 * Release all pending and outgoing TMRs aside from the received
 	 * LUN_RESET tmr..
@@ -206,17 +237,39 @@
 		if (target_check_cdb_and_preempt(preempt_and_abort_list, cmd))
 			continue;
 
+		sess = cmd->se_sess;
+		if (WARN_ON_ONCE(!sess))
+			continue;
+
+		spin_lock(&sess->sess_cmd_lock);
 		spin_lock(&cmd->t_state_lock);
-		if (!(cmd->transport_state & CMD_T_ACTIVE)) {
+		if (!(cmd->transport_state & CMD_T_ACTIVE) ||
+		     (cmd->transport_state & CMD_T_FABRIC_STOP)) {
 			spin_unlock(&cmd->t_state_lock);
+			spin_unlock(&sess->sess_cmd_lock);
 			continue;
 		}
 		if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) {
 			spin_unlock(&cmd->t_state_lock);
+			spin_unlock(&sess->sess_cmd_lock);
 			continue;
 		}
+		if (sess->sess_tearing_down || cmd->cmd_wait_set) {
+			spin_unlock(&cmd->t_state_lock);
+			spin_unlock(&sess->sess_cmd_lock);
+			continue;
+		}
+		cmd->transport_state |= CMD_T_ABORTED;
 		spin_unlock(&cmd->t_state_lock);
 
+		rc = kref_get_unless_zero(&cmd->cmd_kref);
+		if (!rc) {
+			printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n");
+			spin_unlock(&sess->sess_cmd_lock);
+			continue;
+		}
+		spin_unlock(&sess->sess_cmd_lock);
+
 		list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);
 	}
 	spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
@@ -230,20 +283,26 @@
 			(preempt_and_abort_list) ? "Preempt" : "", tmr_p,
 			tmr_p->function, tmr_p->response, cmd->t_state);
 
+		cancel_work_sync(&cmd->work);
+		transport_wait_for_tasks(cmd);
+
 		transport_cmd_finish_abort(cmd, 1);
+		target_put_sess_cmd(cmd);
 	}
 }
 
 static void core_tmr_drain_state_list(
 	struct se_device *dev,
 	struct se_cmd *prout_cmd,
-	struct se_node_acl *tmr_nacl,
+	struct se_session *tmr_sess,
 	int tas,
 	struct list_head *preempt_and_abort_list)
 {
 	LIST_HEAD(drain_task_list);
+	struct se_session *sess;
 	struct se_cmd *cmd, *next;
 	unsigned long flags;
+	int rc;
 
 	/*
 	 * Complete outstanding commands with TASK_ABORTED SAM status.
@@ -282,6 +341,16 @@
 		if (prout_cmd == cmd)
 			continue;
 
+		sess = cmd->se_sess;
+		if (WARN_ON_ONCE(!sess))
+			continue;
+
+		spin_lock(&sess->sess_cmd_lock);
+		rc = __target_check_io_state(cmd, tmr_sess, tas);
+		spin_unlock(&sess->sess_cmd_lock);
+		if (!rc)
+			continue;
+
 		list_move_tail(&cmd->state_list, &drain_task_list);
 		cmd->state_active = false;
 	}
@@ -289,7 +358,7 @@
 
 	while (!list_empty(&drain_task_list)) {
 		cmd = list_entry(drain_task_list.next, struct se_cmd, state_list);
-		list_del(&cmd->state_list);
+		list_del_init(&cmd->state_list);
 
 		pr_debug("LUN_RESET: %s cmd: %p"
 			" ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d"
@@ -313,16 +382,11 @@
 		 * loop above, but we do it down here given that
 		 * cancel_work_sync may block.
 		 */
-		if (cmd->t_state == TRANSPORT_COMPLETE)
-			cancel_work_sync(&cmd->work);
+		cancel_work_sync(&cmd->work);
+		transport_wait_for_tasks(cmd);
 
-		spin_lock_irqsave(&cmd->t_state_lock, flags);
-		target_stop_cmd(cmd, &flags);
-
-		cmd->transport_state |= CMD_T_ABORTED;
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-
-		core_tmr_handle_tas_abort(tmr_nacl, cmd, tas);
+		core_tmr_handle_tas_abort(cmd, tas);
+		target_put_sess_cmd(cmd);
 	}
 }
 
@@ -334,6 +398,7 @@
 {
 	struct se_node_acl *tmr_nacl = NULL;
 	struct se_portal_group *tmr_tpg = NULL;
+	struct se_session *tmr_sess = NULL;
 	int tas;
         /*
 	 * TASK_ABORTED status bit, this is configurable via ConfigFS
@@ -352,8 +417,9 @@
 	 * or struct se_device passthrough..
 	 */
 	if (tmr && tmr->task_cmd && tmr->task_cmd->se_sess) {
-		tmr_nacl = tmr->task_cmd->se_sess->se_node_acl;
-		tmr_tpg = tmr->task_cmd->se_sess->se_tpg;
+		tmr_sess = tmr->task_cmd->se_sess;
+		tmr_nacl = tmr_sess->se_node_acl;
+		tmr_tpg = tmr_sess->se_tpg;
 		if (tmr_nacl && tmr_tpg) {
 			pr_debug("LUN_RESET: TMR caller fabric: %s"
 				" initiator port %s\n",
@@ -366,7 +432,7 @@
 		dev->transport->name, tas);
 
 	core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list);
-	core_tmr_drain_state_list(dev, prout_cmd, tmr_nacl, tas,
+	core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas,
 				preempt_and_abort_list);
 
 	/*
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 9f3608e..867bc6d 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -534,9 +534,6 @@
 }
 EXPORT_SYMBOL(transport_deregister_session);
 
-/*
- * Called with cmd->t_state_lock held.
- */
 static void target_remove_from_state_list(struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
@@ -561,10 +558,6 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (write_pending)
-		cmd->t_state = TRANSPORT_WRITE_PENDING;
-
 	if (remove_from_lists) {
 		target_remove_from_state_list(cmd);
 
@@ -574,6 +567,10 @@
 		cmd->se_lun = NULL;
 	}
 
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	if (write_pending)
+		cmd->t_state = TRANSPORT_WRITE_PENDING;
+
 	/*
 	 * Determine if frontend context caller is requesting the stopping of
 	 * this command for frontend exceptions.
@@ -627,6 +624,8 @@
 
 void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
+	bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+
 	if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
 		transport_lun_remove_cmd(cmd);
 	/*
@@ -638,7 +637,7 @@
 
 	if (transport_cmd_check_stop_to_fabric(cmd))
 		return;
-	if (remove)
+	if (remove && ack_kref)
 		transport_put_cmd(cmd);
 }
 
@@ -694,19 +693,10 @@
 	}
 
 	/*
-	 * See if we are waiting to complete for an exception condition.
-	 */
-	if (cmd->transport_state & CMD_T_REQUEST_STOP) {
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		complete(&cmd->task_stop_comp);
-		return;
-	}
-
-	/*
 	 * Check for case where an explicit ABORT_TASK has been received
 	 * and transport_wait_for_tasks() will be waiting for completion..
 	 */
-	if (cmd->transport_state & CMD_T_ABORTED &&
+	if (cmd->transport_state & CMD_T_ABORTED ||
 	    cmd->transport_state & CMD_T_STOP) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		complete_all(&cmd->t_transport_stop_comp);
@@ -721,10 +711,10 @@
 	cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-	if (cmd->cpuid == -1)
-		queue_work(target_completion_wq, &cmd->work);
-	else
+	if (cmd->se_cmd_flags & SCF_USE_CPUID)
 		queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
+	else
+		queue_work(target_completion_wq, &cmd->work);
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
@@ -1203,7 +1193,6 @@
 	INIT_LIST_HEAD(&cmd->state_list);
 	init_completion(&cmd->t_transport_stop_comp);
 	init_completion(&cmd->cmd_wait_comp);
-	init_completion(&cmd->task_stop_comp);
 	spin_lock_init(&cmd->t_state_lock);
 	kref_init(&cmd->cmd_kref);
 	cmd->transport_state = CMD_T_DEV_ACTIVE;
@@ -1437,6 +1426,12 @@
 	 */
 	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
 				data_length, data_dir, task_attr, sense);
+
+	if (flags & TARGET_SCF_USE_CPUID)
+		se_cmd->se_cmd_flags |= SCF_USE_CPUID;
+	else
+		se_cmd->cpuid = WORK_CPU_UNBOUND;
+
 	if (flags & TARGET_SCF_UNKNOWN_SIZE)
 		se_cmd->unknown_data_length = 1;
 	/*
@@ -1635,33 +1630,6 @@
 EXPORT_SYMBOL(target_submit_tmr);
 
 /*
- * If the cmd is active, request it to be stopped and sleep until it
- * has completed.
- */
-bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags)
-	__releases(&cmd->t_state_lock)
-	__acquires(&cmd->t_state_lock)
-{
-	bool was_active = false;
-
-	if (cmd->transport_state & CMD_T_BUSY) {
-		cmd->transport_state |= CMD_T_REQUEST_STOP;
-		spin_unlock_irqrestore(&cmd->t_state_lock, *flags);
-
-		pr_debug("cmd %p waiting to complete\n", cmd);
-		wait_for_completion(&cmd->task_stop_comp);
-		pr_debug("cmd %p stopped successfully\n", cmd);
-
-		spin_lock_irqsave(&cmd->t_state_lock, *flags);
-		cmd->transport_state &= ~CMD_T_REQUEST_STOP;
-		cmd->transport_state &= ~CMD_T_BUSY;
-		was_active = true;
-	}
-
-	return was_active;
-}
-
-/*
  * Handle SAM-esque emulation for generic transport request failures.
  */
 void transport_generic_request_failure(struct se_cmd *cmd,
@@ -1859,19 +1827,21 @@
 	return true;
 }
 
+static int __transport_check_aborted_status(struct se_cmd *, int);
+
 void target_execute_cmd(struct se_cmd *cmd)
 {
 	/*
-	 * If the received CDB has aleady been aborted stop processing it here.
-	 */
-	if (transport_check_aborted_status(cmd, 1))
-		return;
-
-	/*
 	 * Determine if frontend context caller is requesting the stopping of
 	 * this command for frontend exceptions.
+	 *
+	 * If the received CDB has aleady been aborted stop processing it here.
 	 */
 	spin_lock_irq(&cmd->t_state_lock);
+	if (__transport_check_aborted_status(cmd, 1)) {
+		spin_unlock_irq(&cmd->t_state_lock);
+		return;
+	}
 	if (cmd->transport_state & CMD_T_STOP) {
 		pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n",
 			__func__, __LINE__, cmd->tag);
@@ -2222,28 +2192,6 @@
 }
 
 /**
- * transport_release_cmd - free a command
- * @cmd:       command to free
- *
- * This routine unconditionally frees a command, and reference counting
- * or list removal must be done in the caller.
- */
-static int transport_release_cmd(struct se_cmd *cmd)
-{
-	BUG_ON(!cmd->se_tfo);
-
-	if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
-		core_tmr_release_req(cmd->se_tmr_req);
-	if (cmd->t_task_cdb != cmd->__t_task_cdb)
-		kfree(cmd->t_task_cdb);
-	/*
-	 * If this cmd has been setup with target_get_sess_cmd(), drop
-	 * the kref and call ->release_cmd() in kref callback.
-	 */
-	return target_put_sess_cmd(cmd);
-}
-
-/**
  * transport_put_cmd - release a reference to a command
  * @cmd:       command to release
  *
@@ -2251,8 +2199,12 @@
  */
 static int transport_put_cmd(struct se_cmd *cmd)
 {
-	transport_free_pages(cmd);
-	return transport_release_cmd(cmd);
+	BUG_ON(!cmd->se_tfo);
+	/*
+	 * If this cmd has been setup with target_get_sess_cmd(), drop
+	 * the kref and call ->release_cmd() in kref callback.
+	 */
+	return target_put_sess_cmd(cmd);
 }
 
 void *transport_kmap_data_sg(struct se_cmd *cmd)
@@ -2450,34 +2402,58 @@
 	}
 }
 
-int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
+static bool
+__transport_wait_for_tasks(struct se_cmd *, bool, bool *, bool *,
+			   unsigned long *flags);
+
+static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas)
 {
 	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	__transport_wait_for_tasks(cmd, true, aborted, tas, &flags);
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+}
+
+int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
+{
 	int ret = 0;
+	bool aborted = false, tas = false;
 
 	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) {
 		if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
-			 transport_wait_for_tasks(cmd);
+			target_wait_free_cmd(cmd, &aborted, &tas);
 
-		ret = transport_release_cmd(cmd);
+		if (!aborted || tas)
+			ret = transport_put_cmd(cmd);
 	} else {
 		if (wait_for_tasks)
-			transport_wait_for_tasks(cmd);
+			target_wait_free_cmd(cmd, &aborted, &tas);
 		/*
 		 * Handle WRITE failure case where transport_generic_new_cmd()
 		 * has already added se_cmd to state_list, but fabric has
 		 * failed command before I/O submission.
 		 */
-		if (cmd->state_active) {
-			spin_lock_irqsave(&cmd->t_state_lock, flags);
+		if (cmd->state_active)
 			target_remove_from_state_list(cmd);
-			spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		}
 
 		if (cmd->se_lun)
 			transport_lun_remove_cmd(cmd);
 
-		ret = transport_put_cmd(cmd);
+		if (!aborted || tas)
+			ret = transport_put_cmd(cmd);
+	}
+	/*
+	 * If the task has been internally aborted due to TMR ABORT_TASK
+	 * or LUN_RESET, target_core_tmr.c is responsible for performing
+	 * the remaining calls to target_put_sess_cmd(), and not the
+	 * callers of this function.
+	 */
+	if (aborted) {
+		pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag);
+		wait_for_completion(&cmd->cmd_wait_comp);
+		cmd->se_tfo->release_cmd(cmd);
+		ret = 1;
 	}
 	return ret;
 }
@@ -2517,26 +2493,46 @@
 }
 EXPORT_SYMBOL(target_get_sess_cmd);
 
+static void target_free_cmd_mem(struct se_cmd *cmd)
+{
+	transport_free_pages(cmd);
+
+	if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+		core_tmr_release_req(cmd->se_tmr_req);
+	if (cmd->t_task_cdb != cmd->__t_task_cdb)
+		kfree(cmd->t_task_cdb);
+}
+
 static void target_release_cmd_kref(struct kref *kref)
 {
 	struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
 	struct se_session *se_sess = se_cmd->se_sess;
 	unsigned long flags;
+	bool fabric_stop;
 
 	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	if (list_empty(&se_cmd->se_cmd_list)) {
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+		target_free_cmd_mem(se_cmd);
 		se_cmd->se_tfo->release_cmd(se_cmd);
 		return;
 	}
-	if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
+
+	spin_lock(&se_cmd->t_state_lock);
+	fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP);
+	spin_unlock(&se_cmd->t_state_lock);
+
+	if (se_cmd->cmd_wait_set || fabric_stop) {
+		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+		target_free_cmd_mem(se_cmd);
 		complete(&se_cmd->cmd_wait_comp);
 		return;
 	}
-	list_del(&se_cmd->se_cmd_list);
+	list_del_init(&se_cmd->se_cmd_list);
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
+	target_free_cmd_mem(se_cmd);
 	se_cmd->se_tfo->release_cmd(se_cmd);
 }
 
@@ -2548,6 +2544,7 @@
 	struct se_session *se_sess = se_cmd->se_sess;
 
 	if (!se_sess) {
+		target_free_cmd_mem(se_cmd);
 		se_cmd->se_tfo->release_cmd(se_cmd);
 		return 1;
 	}
@@ -2564,6 +2561,7 @@
 {
 	struct se_cmd *se_cmd;
 	unsigned long flags;
+	int rc;
 
 	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	if (se_sess->sess_tearing_down) {
@@ -2573,8 +2571,15 @@
 	se_sess->sess_tearing_down = 1;
 	list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list);
 
-	list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list)
-		se_cmd->cmd_wait_set = 1;
+	list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) {
+		rc = kref_get_unless_zero(&se_cmd->cmd_kref);
+		if (rc) {
+			se_cmd->cmd_wait_set = 1;
+			spin_lock(&se_cmd->t_state_lock);
+			se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+			spin_unlock(&se_cmd->t_state_lock);
+		}
+	}
 
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 }
@@ -2587,15 +2592,25 @@
 {
 	struct se_cmd *se_cmd, *tmp_cmd;
 	unsigned long flags;
+	bool tas;
 
 	list_for_each_entry_safe(se_cmd, tmp_cmd,
 				&se_sess->sess_wait_list, se_cmd_list) {
-		list_del(&se_cmd->se_cmd_list);
+		list_del_init(&se_cmd->se_cmd_list);
 
 		pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
 			" %d\n", se_cmd, se_cmd->t_state,
 			se_cmd->se_tfo->get_cmd_state(se_cmd));
 
+		spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+		tas = (se_cmd->transport_state & CMD_T_TAS);
+		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+		if (!target_put_sess_cmd(se_cmd)) {
+			if (tas)
+				target_put_sess_cmd(se_cmd);
+		}
+
 		wait_for_completion(&se_cmd->cmd_wait_comp);
 		pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d"
 			" fabric state: %d\n", se_cmd, se_cmd->t_state,
@@ -2617,6 +2632,58 @@
 	wait_for_completion(&lun->lun_ref_comp);
 }
 
+static bool
+__transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
+			   bool *aborted, bool *tas, unsigned long *flags)
+	__releases(&cmd->t_state_lock)
+	__acquires(&cmd->t_state_lock)
+{
+
+	assert_spin_locked(&cmd->t_state_lock);
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (fabric_stop)
+		cmd->transport_state |= CMD_T_FABRIC_STOP;
+
+	if (cmd->transport_state & CMD_T_ABORTED)
+		*aborted = true;
+
+	if (cmd->transport_state & CMD_T_TAS)
+		*tas = true;
+
+	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) &&
+	    !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
+		return false;
+
+	if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) &&
+	    !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
+		return false;
+
+	if (!(cmd->transport_state & CMD_T_ACTIVE))
+		return false;
+
+	if (fabric_stop && *aborted)
+		return false;
+
+	cmd->transport_state |= CMD_T_STOP;
+
+	pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d,"
+		 " t_state: %d, CMD_T_STOP\n", cmd, cmd->tag,
+		 cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
+
+	spin_unlock_irqrestore(&cmd->t_state_lock, *flags);
+
+	wait_for_completion(&cmd->t_transport_stop_comp);
+
+	spin_lock_irqsave(&cmd->t_state_lock, *flags);
+	cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP);
+
+	pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->"
+		 "t_transport_stop_comp) for ITT: 0x%08llx\n", cmd->tag);
+
+	return true;
+}
+
 /**
  * transport_wait_for_tasks - wait for completion to occur
  * @cmd:	command to wait
@@ -2627,43 +2694,13 @@
 bool transport_wait_for_tasks(struct se_cmd *cmd)
 {
 	unsigned long flags;
+	bool ret, aborted = false, tas = false;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) &&
-	    !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		return false;
-	}
-
-	if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) &&
-	    !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		return false;
-	}
-
-	if (!(cmd->transport_state & CMD_T_ACTIVE)) {
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		return false;
-	}
-
-	cmd->transport_state |= CMD_T_STOP;
-
-	pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d, t_state: %d, CMD_T_STOP\n",
-		cmd, cmd->tag, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
-
+	ret = __transport_wait_for_tasks(cmd, false, &aborted, &tas, &flags);
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-	wait_for_completion(&cmd->t_transport_stop_comp);
-
-	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP);
-
-	pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->t_transport_stop_comp) for ITT: 0x%08llx\n",
-		cmd->tag);
-
-	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-
-	return true;
+	return ret;
 }
 EXPORT_SYMBOL(transport_wait_for_tasks);
 
@@ -2845,28 +2882,49 @@
 }
 EXPORT_SYMBOL(transport_send_check_condition_and_sense);
 
-int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+	__releases(&cmd->t_state_lock)
+	__acquires(&cmd->t_state_lock)
 {
+	assert_spin_locked(&cmd->t_state_lock);
+	WARN_ON_ONCE(!irqs_disabled());
+
 	if (!(cmd->transport_state & CMD_T_ABORTED))
 		return 0;
-
 	/*
 	 * If cmd has been aborted but either no status is to be sent or it has
 	 * already been sent, just return
 	 */
-	if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS))
+	if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) {
+		if (send_status)
+			cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
 		return 1;
+	}
 
-	pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08llx\n",
-		 cmd->t_task_cdb[0], cmd->tag);
+	pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:"
+		" 0x%02x ITT: 0x%08llx\n", cmd->t_task_cdb[0], cmd->tag);
 
 	cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS;
 	cmd->scsi_status = SAM_STAT_TASK_ABORTED;
 	trace_target_cmd_complete(cmd);
+
+	spin_unlock_irq(&cmd->t_state_lock);
 	cmd->se_tfo->queue_status(cmd);
+	spin_lock_irq(&cmd->t_state_lock);
 
 	return 1;
 }
+
+int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+{
+	int ret;
+
+	spin_lock_irq(&cmd->t_state_lock);
+	ret = __transport_check_aborted_status(cmd, send_status);
+	spin_unlock_irq(&cmd->t_state_lock);
+
+	return ret;
+}
 EXPORT_SYMBOL(transport_check_aborted_status);
 
 void transport_send_task_abort(struct se_cmd *cmd)
@@ -2888,11 +2946,17 @@
 	 */
 	if (cmd->data_direction == DMA_TO_DEVICE) {
 		if (cmd->se_tfo->write_pending_status(cmd) != 0) {
-			cmd->transport_state |= CMD_T_ABORTED;
+			spin_lock_irqsave(&cmd->t_state_lock, flags);
+			if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) {
+				spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+				goto send_abort;
+			}
 			cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
+			spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 			return;
 		}
 	}
+send_abort:
 	cmd->scsi_status = SAM_STAT_TASK_ABORTED;
 
 	transport_lun_remove_cmd(cmd);
@@ -2909,8 +2973,17 @@
 	struct se_cmd *cmd = container_of(work, struct se_cmd, work);
 	struct se_device *dev = cmd->se_dev;
 	struct se_tmr_req *tmr = cmd->se_tmr_req;
+	unsigned long flags;
 	int ret;
 
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	if (cmd->transport_state & CMD_T_ABORTED) {
+		tmr->response = TMR_FUNCTION_REJECTED;
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		goto check_stop;
+	}
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
 	switch (tmr->function) {
 	case TMR_ABORT_TASK:
 		core_tmr_abort_task(dev, tmr, cmd->se_sess);
@@ -2943,9 +3016,17 @@
 		break;
 	}
 
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	if (cmd->transport_state & CMD_T_ABORTED) {
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		goto check_stop;
+	}
 	cmd->t_state = TRANSPORT_ISTATE_PROCESSING;
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
 	cmd->se_tfo->queue_tm_rsp(cmd);
 
+check_stop:
 	transport_cmd_check_stop_to_fabric(cmd);
 }
 
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index dd600e5..94f5154 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -903,7 +903,7 @@
 	info->version = __stringify(TCMU_MAILBOX_VERSION);
 
 	info->mem[0].name = "tcm-user command & data buffer";
-	info->mem[0].addr = (phys_addr_t) udev->mb_addr;
+	info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr;
 	info->mem[0].size = TCMU_RING_SIZE;
 	info->mem[0].memtype = UIO_MEM_VIRTUAL;
 
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 8cc4ac6..7c92c09 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -195,7 +195,7 @@
 	  passive trip is crossed.
 
 config SPEAR_THERMAL
-	bool "SPEAr thermal sensor driver"
+	tristate "SPEAr thermal sensor driver"
 	depends on PLAT_SPEAR || COMPILE_TEST
 	depends on OF
 	help
@@ -237,8 +237,8 @@
 	  framework.
 
 config DB8500_THERMAL
-	bool "DB8500 thermal management"
-	depends on ARCH_U8500
+	tristate "DB8500 thermal management"
+	depends on MFD_DB8500_PRCMU
 	default y
 	help
 	  Adds DB8500 thermal management implementation according to the thermal
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index e3fbc5a..6ceac4f 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -377,26 +377,28 @@
  * get_load() - get load for a cpu since last updated
  * @cpufreq_device:	&struct cpufreq_cooling_device for this cpu
  * @cpu:	cpu number
+ * @cpu_idx:	index of the cpu in cpufreq_device->allowed_cpus
  *
  * Return: The average load of cpu @cpu in percentage since this
  * function was last called.
  */
-static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
+		    int cpu_idx)
 {
 	u32 load;
 	u64 now, now_idle, delta_time, delta_idle;
 
 	now_idle = get_cpu_idle_time(cpu, &now, 0);
-	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
-	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
+	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
+	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
 
 	if (delta_time <= delta_idle)
 		load = 0;
 	else
 		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 
-	cpufreq_device->time_in_idle[cpu] = now_idle;
-	cpufreq_device->time_in_idle_timestamp[cpu] = now;
+	cpufreq_device->time_in_idle[cpu_idx] = now_idle;
+	cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
 
 	return load;
 }
@@ -598,7 +600,7 @@
 		u32 load;
 
 		if (cpu_online(cpu))
-			load = get_load(cpufreq_device, cpu);
+			load = get_load(cpufreq_device, cpu, i);
 		else
 			load = 0;
 
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index be4eedc..9043f8f 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -475,14 +475,10 @@
 
 	sensor_np = of_node_get(dev->of_node);
 
-	for_each_child_of_node(np, child) {
+	for_each_available_child_of_node(np, child) {
 		struct of_phandle_args sensor_specs;
 		int ret, id;
 
-		/* Check whether child is enabled or not */
-		if (!of_device_is_available(child))
-			continue;
-
 		/* For now, thermal framework supports only 1 sensor per zone */
 		ret = of_parse_phandle_with_args(child, "thermal-sensors",
 						 "#thermal-sensor-cells",
@@ -881,16 +877,12 @@
 		return 0; /* Run successfully on systems without thermal DT */
 	}
 
-	for_each_child_of_node(np, child) {
+	for_each_available_child_of_node(np, child) {
 		struct thermal_zone_device *zone;
 		struct thermal_zone_params *tzp;
 		int i, mask = 0;
 		u32 prop;
 
-		/* Check whether child is enabled or not */
-		if (!of_device_is_available(child))
-			continue;
-
 		tz = thermal_of_build_thermal_zone(child);
 		if (IS_ERR(tz)) {
 			pr_err("failed to build thermal zone %s: %ld\n",
@@ -968,13 +960,9 @@
 		return;
 	}
 
-	for_each_child_of_node(np, child) {
+	for_each_available_child_of_node(np, child) {
 		struct thermal_zone_device *zone;
 
-		/* Check whether child is enabled or not */
-		if (!of_device_is_available(child))
-			continue;
-
 		zone = thermal_zone_get_zone_by_name(child->name);
 		if (IS_ERR(zone))
 			continue;
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 44b9c48..0e735ac 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reboot.h>
@@ -75,8 +76,10 @@
 #define rcar_has_irq_support(priv)	((priv)->common->base)
 #define rcar_id_to_shift(priv)		((priv)->id * 8)
 
+#define USE_OF_THERMAL	1
 static const struct of_device_id rcar_thermal_dt_ids[] = {
 	{ .compatible = "renesas,rcar-thermal", },
+	{ .compatible = "renesas,rcar-gen2-thermal", .data = (void *)USE_OF_THERMAL },
 	{},
 };
 MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
@@ -200,9 +203,9 @@
 	return ret;
 }
 
-static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
+static int rcar_thermal_get_current_temp(struct rcar_thermal_priv *priv,
+					 int *temp)
 {
-	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
 	int tmp;
 	int ret;
 
@@ -226,6 +229,20 @@
 	return 0;
 }
 
+static int rcar_thermal_of_get_temp(void *data, int *temp)
+{
+	struct rcar_thermal_priv *priv = data;
+
+	return rcar_thermal_get_current_temp(priv, temp);
+}
+
+static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
+{
+	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+
+	return rcar_thermal_get_current_temp(priv, temp);
+}
+
 static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone,
 				      int trip, enum thermal_trip_type *type)
 {
@@ -282,6 +299,10 @@
 	return 0;
 }
 
+static const struct thermal_zone_of_device_ops rcar_thermal_zone_of_ops = {
+	.get_temp	= rcar_thermal_of_get_temp,
+};
+
 static struct thermal_zone_device_ops rcar_thermal_zone_ops = {
 	.get_temp	= rcar_thermal_get_temp,
 	.get_trip_type	= rcar_thermal_get_trip_type,
@@ -318,14 +339,20 @@
 
 	priv = container_of(work, struct rcar_thermal_priv, work.work);
 
-	rcar_thermal_get_temp(priv->zone, &cctemp);
+	ret = rcar_thermal_get_current_temp(priv, &cctemp);
+	if (ret < 0)
+		return;
+
 	ret = rcar_thermal_update_temp(priv);
 	if (ret < 0)
 		return;
 
 	rcar_thermal_irq_enable(priv);
 
-	rcar_thermal_get_temp(priv->zone, &nctemp);
+	ret = rcar_thermal_get_current_temp(priv, &nctemp);
+	if (ret < 0)
+		return;
+
 	if (nctemp != cctemp)
 		thermal_zone_device_update(priv->zone);
 }
@@ -403,6 +430,8 @@
 	struct rcar_thermal_priv *priv;
 	struct device *dev = &pdev->dev;
 	struct resource *res, *irq;
+	const struct of_device_id *of_id = of_match_device(rcar_thermal_dt_ids, dev);
+	unsigned long of_data = (unsigned long)of_id->data;
 	int mres = 0;
 	int i;
 	int ret = -ENODEV;
@@ -463,7 +492,13 @@
 		if (ret < 0)
 			goto error_unregister;
 
-		priv->zone = thermal_zone_device_register("rcar_thermal",
+		if (of_data == USE_OF_THERMAL)
+			priv->zone = thermal_zone_of_sensor_register(
+						dev, i, priv,
+						&rcar_thermal_zone_of_ops);
+		else
+			priv->zone = thermal_zone_device_register(
+						"rcar_thermal",
 						1, 0, priv,
 						&rcar_thermal_zone_ops, NULL, 0,
 						idle);
diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c
index 534dd91..81b35aa 100644
--- a/drivers/thermal/spear_thermal.c
+++ b/drivers/thermal/spear_thermal.c
@@ -54,8 +54,7 @@
 	.get_temp = thermal_get_temp,
 };
 
-#ifdef CONFIG_PM
-static int spear_thermal_suspend(struct device *dev)
+static int __maybe_unused spear_thermal_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev);
@@ -72,7 +71,7 @@
 	return 0;
 }
 
-static int spear_thermal_resume(struct device *dev)
+static int __maybe_unused spear_thermal_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct thermal_zone_device *spear_thermal = platform_get_drvdata(pdev);
@@ -94,7 +93,6 @@
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(spear_thermal_pm_ops, spear_thermal_suspend,
 		spear_thermal_resume);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index d9a5fc2..b280abaa 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -269,16 +269,13 @@
 
 static void n_tty_check_unthrottle(struct tty_struct *tty)
 {
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) {
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY) {
 		if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE)
 			return;
 		if (!tty->count)
 			return;
 		n_tty_kick_worker(tty);
-		n_tty_write_wakeup(tty->link);
-		if (waitqueue_active(&tty->link->write_wait))
-			wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT);
+		tty_wakeup(tty->link);
 		return;
 	}
 
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index b311004..2348fa6 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -681,7 +681,14 @@
 /* this is called once with whichever end is closed last */
 static void pty_unix98_shutdown(struct tty_struct *tty)
 {
-	devpts_kill_index(tty->driver_data, tty->index);
+	struct inode *ptmx_inode;
+
+	if (tty->driver->subtype == PTY_TYPE_MASTER)
+		ptmx_inode = tty->driver_data;
+	else
+		ptmx_inode = tty->link->driver_data;
+	devpts_kill_index(ptmx_inode, tty->index);
+	devpts_del_ref(ptmx_inode);
 }
 
 static const struct tty_operations ptm_unix98_ops = {
@@ -773,6 +780,18 @@
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 	tty->driver_data = inode;
 
+	/*
+	 * In the case where all references to ptmx inode are dropped and we
+	 * still have /dev/tty opened pointing to the master/slave pair (ptmx
+	 * is closed/released before /dev/tty), we must make sure that the inode
+	 * is still valid when we call the final pty_unix98_shutdown, thus we
+	 * hold an additional reference to the ptmx inode. For the same /dev/tty
+	 * last close case, we also need to make sure the super_block isn't
+	 * destroyed (devpts instance unmounted), before /dev/tty is closed and
+	 * on its release devpts_kill_index is called.
+	 */
+	devpts_add_ref(inode);
+
 	tty_add_file(tty, filp);
 
 	slave_inode = devpts_pty_new(inode,
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 4097f3f6..7cd6f9a 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1379,6 +1379,9 @@
 #define PCI_DEVICE_ID_INTEL_BSW_UART1	0x228a
 #define PCI_DEVICE_ID_INTEL_BSW_UART2	0x228c
 
+#define PCI_DEVICE_ID_INTEL_BDW_UART1	0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2	0x9ce4
+
 #define BYT_PRV_CLK			0x800
 #define BYT_PRV_CLK_EN			(1 << 0)
 #define BYT_PRV_CLK_M_VAL_SHIFT		1
@@ -1461,11 +1464,13 @@
 	switch (pdev->device) {
 	case PCI_DEVICE_ID_INTEL_BYT_UART1:
 	case PCI_DEVICE_ID_INTEL_BSW_UART1:
+	case PCI_DEVICE_ID_INTEL_BDW_UART1:
 		rx_param->src_id = 3;
 		tx_param->dst_id = 2;
 		break;
 	case PCI_DEVICE_ID_INTEL_BYT_UART2:
 	case PCI_DEVICE_ID_INTEL_BSW_UART2:
+	case PCI_DEVICE_ID_INTEL_BDW_UART2:
 		rx_param->src_id = 5;
 		tx_param->dst_id = 4;
 		break;
@@ -1936,6 +1941,7 @@
 #define PCIE_VENDOR_ID_WCH		0x1c00
 #define PCIE_DEVICE_ID_WCH_CH382_2S1P	0x3250
 #define PCIE_DEVICE_ID_WCH_CH384_4S	0x3470
+#define PCIE_DEVICE_ID_WCH_CH382_2S	0x3253
 
 #define PCI_VENDOR_ID_PERICOM			0x12D8
 #define PCI_DEVICE_ID_PERICOM_PI7C9X7951	0x7951
@@ -2062,6 +2068,20 @@
 		.subdevice	= PCI_ANY_ID,
 		.setup		= byt_serial_setup,
 	},
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BDW_UART1,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= byt_serial_setup,
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_BDW_UART2,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= byt_serial_setup,
+	},
 	/*
 	 * ITE
 	 */
@@ -2618,6 +2638,14 @@
 		.subdevice	= PCI_ANY_ID,
 		.setup		= pci_wch_ch353_setup,
 	},
+	/* WCH CH382 2S card (16850 clone) */
+	{
+		.vendor         = PCIE_VENDOR_ID_WCH,
+		.device         = PCIE_DEVICE_ID_WCH_CH382_2S,
+		.subvendor      = PCI_ANY_ID,
+		.subdevice      = PCI_ANY_ID,
+		.setup          = pci_wch_ch38x_setup,
+	},
 	/* WCH CH382 2S1P card (16850 clone) */
 	{
 		.vendor         = PCIE_VENDOR_ID_WCH,
@@ -2936,6 +2964,7 @@
 	pbn_fintek_4,
 	pbn_fintek_8,
 	pbn_fintek_12,
+	pbn_wch382_2,
 	pbn_wch384_4,
 	pbn_pericom_PI7C9X7951,
 	pbn_pericom_PI7C9X7952,
@@ -3756,6 +3785,13 @@
 		.base_baud	= 115200,
 		.first_offset	= 0x40,
 	},
+	[pbn_wch382_2] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 2,
+		.base_baud	= 115200,
+		.uart_offset	= 8,
+		.first_offset	= 0xC0,
+	},
 	[pbn_wch384_4] = {
 		.flags		= FL_BASE0,
 		.num_ports	= 4,
@@ -5506,6 +5542,16 @@
 		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
 		pbn_byt },
 
+	/* Intel Broadwell */
+	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
+		PCI_ANY_ID,  PCI_ANY_ID,
+		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+		pbn_byt },
+	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
+		PCI_ANY_ID,  PCI_ANY_ID,
+		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+		pbn_byt },
+
 	/*
 	 * Intel Quark x1000
 	 */
@@ -5545,6 +5591,10 @@
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, pbn_b0_bt_2_115200 },
 
+	{	PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH382_2S,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0, pbn_wch382_2 },
+
 	{	PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S,
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, pbn_wch384_4 },
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index b645f92..fa49eb1 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1165,7 +1165,7 @@
 
 #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
 
-static void wait_for_xmitr(struct uart_omap_port *up)
+static void __maybe_unused wait_for_xmitr(struct uart_omap_port *up)
 {
 	unsigned int status, tmout = 10000;
 
@@ -1343,7 +1343,7 @@
 
 /* Enable or disable the rs485 support */
 static int
-serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
+serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485)
 {
 	struct uart_omap_port *up = to_uart_omap_port(port);
 	unsigned int mode;
@@ -1356,8 +1356,12 @@
 	up->ier = 0;
 	serial_out(up, UART_IER, 0);
 
+	/* Clamp the delays to [0, 100ms] */
+	rs485->delay_rts_before_send = min(rs485->delay_rts_before_send, 100U);
+	rs485->delay_rts_after_send  = min(rs485->delay_rts_after_send, 100U);
+
 	/* store new config */
-	port->rs485 = *rs485conf;
+	port->rs485 = *rs485;
 
 	/*
 	 * Just as a precaution, only allow rs485
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 892c923..a7eacef 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1463,13 +1463,13 @@
 {
 	struct tty_driver *driver = tty->driver;
 
-	if (!tty->count)
-		return -EIO;
-
 	if (driver->type == TTY_DRIVER_TYPE_PTY &&
 	    driver->subtype == PTY_TYPE_MASTER)
 		return -EIO;
 
+	if (!tty->count)
+		return -EAGAIN;
+
 	if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
 		return -EBUSY;
 
@@ -2065,9 +2065,13 @@
 
 		if (tty) {
 			mutex_unlock(&tty_mutex);
-			tty_lock(tty);
-			/* safe to drop the kref from tty_driver_lookup_tty() */
-			tty_kref_put(tty);
+			retval = tty_lock_interruptible(tty);
+			tty_kref_put(tty);  /* drop kref from tty_driver_lookup_tty() */
+			if (retval) {
+				if (retval == -EINTR)
+					retval = -ERESTARTSYS;
+				goto err_unref;
+			}
 			retval = tty_reopen(tty);
 			if (retval < 0) {
 				tty_unlock(tty);
@@ -2083,7 +2087,11 @@
 
 	if (IS_ERR(tty)) {
 		retval = PTR_ERR(tty);
-		goto err_file;
+		if (retval != -EAGAIN || signal_pending(current))
+			goto err_file;
+		tty_free_file(filp);
+		schedule();
+		goto retry_open;
 	}
 
 	tty_add_file(tty, filp);
@@ -2152,6 +2160,7 @@
 	return 0;
 err_unlock:
 	mutex_unlock(&tty_mutex);
+err_unref:
 	/* after locks to avoid deadlock */
 	if (!IS_ERR_OR_NULL(driver))
 		tty_driver_kref_put(driver);
@@ -2649,6 +2658,28 @@
 }
 
 /**
+ *	tiocgetd	-	get line discipline
+ *	@tty: tty device
+ *	@p: pointer to user data
+ *
+ *	Retrieves the line discipline id directly from the ldisc.
+ *
+ *	Locking: waits for ldisc reference (in case the line discipline
+ *		is changing or the tty is being hungup)
+ */
+
+static int tiocgetd(struct tty_struct *tty, int __user *p)
+{
+	struct tty_ldisc *ld;
+	int ret;
+
+	ld = tty_ldisc_ref_wait(tty);
+	ret = put_user(ld->ops->num, p);
+	tty_ldisc_deref(ld);
+	return ret;
+}
+
+/**
  *	send_break	-	performed time break
  *	@tty: device to break on
  *	@duration: timeout in mS
@@ -2874,7 +2905,7 @@
 	case TIOCGSID:
 		return tiocgsid(tty, real_tty, p);
 	case TIOCGETD:
-		return put_user(tty->ldisc->ops->num, (int __user *)p);
+		return tiocgetd(tty, p);
 	case TIOCSETD:
 		return tiocsetd(tty, p);
 	case TIOCVHANGUP:
diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c
index 77703a39..dfa9ec0 100644
--- a/drivers/tty/tty_mutex.c
+++ b/drivers/tty/tty_mutex.c
@@ -19,6 +19,19 @@
 }
 EXPORT_SYMBOL(tty_lock);
 
+int tty_lock_interruptible(struct tty_struct *tty)
+{
+	int ret;
+
+	if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty))
+		return -EIO;
+	tty_kref_get(tty);
+	ret = mutex_lock_interruptible(&tty->legacy_mutex);
+	if (ret)
+		tty_kref_put(tty);
+	return ret;
+}
+
 void __lockfunc tty_unlock(struct tty_struct *tty)
 {
 	if (WARN(tty->magic != TTY_MAGIC, "U Bad %p\n", tty))
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index e7cbc44..bd51bdd 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4250,6 +4250,7 @@
 {
 	return screenpos(vc, 2 * w_offset, viewed);
 }
+EXPORT_SYMBOL_GPL(screen_pos);
 
 void getconsxy(struct vc_data *vc, unsigned char *p)
 {
diff --git a/drivers/usb/chipidea/ci_hdrc_pci.c b/drivers/usb/chipidea/ci_hdrc_pci.c
index b59195e..b635ab6 100644
--- a/drivers/usb/chipidea/ci_hdrc_pci.c
+++ b/drivers/usb/chipidea/ci_hdrc_pci.c
@@ -85,8 +85,8 @@
 
 	/* register a nop PHY */
 	ci->phy = usb_phy_generic_register();
-	if (!ci->phy)
-		return -ENOMEM;
+	if (IS_ERR(ci->phy))
+		return PTR_ERR(ci->phy);
 
 	memset(res, 0, sizeof(res));
 	res[0].start	= pci_resource_start(pdev, 0);
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index a4f7db2..df47110 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -100,6 +100,9 @@
 	if (sscanf(buf, "%u", &mode) != 1)
 		return -EINVAL;
 
+	if (mode > 255)
+		return -EBADRQC;
+
 	pm_runtime_get_sync(ci->dev);
 	spin_lock_irqsave(&ci->lock, flags);
 	ret = hw_port_test_set(ci, mode);
diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
index 45f86da..03b6743 100644
--- a/drivers/usb/chipidea/otg.c
+++ b/drivers/usb/chipidea/otg.c
@@ -158,7 +158,7 @@
 int ci_hdrc_otg_init(struct ci_hdrc *ci)
 {
 	INIT_WORK(&ci->work, ci_otg_work);
-	ci->wq = create_singlethread_workqueue("ci_otg");
+	ci->wq = create_freezable_workqueue("ci_otg");
 	if (!ci->wq) {
 		dev_err(ci->dev, "can't create workqueue\n");
 		return -ENODEV;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 26ca4f9..fa4e239 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -428,7 +428,8 @@
 		set_bit(rb->index, &acm->read_urbs_free);
 		dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
 							__func__, status);
-		return;
+		if ((status != -ENOENT) || (urb->actual_length == 0))
+			return;
 	}
 
 	usb_mark_last_busy(acm->dev);
@@ -1404,6 +1405,8 @@
 				usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
 				NULL, acm->writesize, acm_write_bulk, snd);
 		snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+		if (quirks & SEND_ZERO_PACKET)
+			snd->urb->transfer_flags |= URB_ZERO_PACKET;
 		snd->instance = acm;
 	}
 
@@ -1838,6 +1841,11 @@
 	},
 #endif
 
+	/*Samsung phone in firmware update mode */
+	{ USB_DEVICE(0x04e8, 0x685d),
+	.driver_info = IGNORE_DEVICE,
+	},
+
 	/* Exclude Infineon Flash Loader utility */
 	{ USB_DEVICE(0x058b, 0x0041),
 	.driver_info = IGNORE_DEVICE,
@@ -1861,6 +1869,10 @@
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_CDMA) },
 
+	{ USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
+	.driver_info = SEND_ZERO_PACKET,
+	},
+
 	{ }
 };
 
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index dd9af38..ccfaba9 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -134,3 +134,4 @@
 #define IGNORE_DEVICE			BIT(5)
 #define QUIRK_CONTROL_LINE_STATE	BIT(6)
 #define CLEAR_HALT_CONDITIONS		BIT(7)
+#define SEND_ZERO_PACKET		BIT(8)
diff --git a/drivers/usb/dwc2/Kconfig b/drivers/usb/dwc2/Kconfig
index fd95ba6..f0decc0 100644
--- a/drivers/usb/dwc2/Kconfig
+++ b/drivers/usb/dwc2/Kconfig
@@ -1,5 +1,6 @@
 config USB_DWC2
 	tristate "DesignWare USB2 DRD Core Support"
+	depends on HAS_DMA
 	depends on USB || USB_GADGET
 	help
 	  Say Y here if your system has a Dual Role Hi-Speed USB
diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index 39a0fa8..46c4ba7 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -572,12 +572,6 @@
 	set = host ? GUSBCFG_FORCEHOSTMODE : GUSBCFG_FORCEDEVMODE;
 	clear = host ? GUSBCFG_FORCEDEVMODE : GUSBCFG_FORCEHOSTMODE;
 
-	/*
-	 * If the force mode bit is already set, don't set it.
-	 */
-	if ((gusbcfg & set) && !(gusbcfg & clear))
-		return false;
-
 	gusbcfg &= ~clear;
 	gusbcfg |= set;
 	dwc2_writel(gusbcfg, hsotg->regs + GUSBCFG);
@@ -625,6 +619,12 @@
 			 __func__, hsotg->dr_mode);
 		break;
 	}
+
+	/*
+	 * NOTE: This is required for some rockchip soc based
+	 * platforms.
+	 */
+	msleep(50);
 }
 
 /*
@@ -3278,9 +3278,6 @@
 /**
  * During device initialization, read various hardware configuration
  * registers and interpret the contents.
- *
- * This should be called during driver probe. It will perform a core
- * soft reset in order to get the reset values of the parameters.
  */
 int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 {
@@ -3288,7 +3285,6 @@
 	unsigned width;
 	u32 hwcfg1, hwcfg2, hwcfg3, hwcfg4;
 	u32 grxfsiz;
-	int retval;
 
 	/*
 	 * Attempt to ensure this device is really a DWC_otg Controller.
@@ -3308,10 +3304,6 @@
 		hw->snpsid >> 12 & 0xf, hw->snpsid >> 8 & 0xf,
 		hw->snpsid >> 4 & 0xf, hw->snpsid & 0xf, hw->snpsid);
 
-	retval = dwc2_core_reset(hsotg);
-	if (retval)
-		return retval;
-
 	hwcfg1 = dwc2_readl(hsotg->regs + GHWCFG1);
 	hwcfg2 = dwc2_readl(hsotg->regs + GHWCFG2);
 	hwcfg3 = dwc2_readl(hsotg->regs + GHWCFG3);
diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index 36606fc..a41274a 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c
@@ -1174,14 +1174,11 @@
 	failed = dwc2_update_non_isoc_urb_state_ddma(hsotg, chan, qtd, dma_desc,
 						     halt_status, n_bytes,
 						     xfer_done);
-	if (*xfer_done && urb->status != -EINPROGRESS)
-		failed = 1;
-
-	if (failed) {
+	if (failed || (*xfer_done && urb->status != -EINPROGRESS)) {
 		dwc2_host_complete(hsotg, qtd, urb->status);
 		dwc2_hcd_qtd_unlink_and_free(hsotg, qtd, qh);
-		dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x status=%08x\n",
-			 failed, *xfer_done, urb->status);
+		dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x\n",
+			 failed, *xfer_done);
 		return failed;
 	}
 
@@ -1236,21 +1233,23 @@
 
 	list_for_each_safe(qtd_item, qtd_tmp, &qh->qtd_list) {
 		int i;
+		int qtd_desc_count;
 
 		qtd = list_entry(qtd_item, struct dwc2_qtd, qtd_list_entry);
 		xfer_done = 0;
+		qtd_desc_count = qtd->n_desc;
 
-		for (i = 0; i < qtd->n_desc; i++) {
+		for (i = 0; i < qtd_desc_count; i++) {
 			if (dwc2_process_non_isoc_desc(hsotg, chan, chnum, qtd,
 						       desc_num, halt_status,
-						       &xfer_done)) {
-				qtd = NULL;
-				break;
-			}
+						       &xfer_done))
+				goto stop_scan;
+
 			desc_num++;
 		}
 	}
 
+stop_scan:
 	if (qh->ep_type != USB_ENDPOINT_XFER_CONTROL) {
 		/*
 		 * Resetting the data toggle for bulk and interrupt endpoints
@@ -1258,7 +1257,7 @@
 		 */
 		if (halt_status == DWC2_HC_XFER_STALL)
 			qh->data_toggle = DWC2_HC_PID_DATA0;
-		else if (qtd)
+		else
 			dwc2_hcd_save_data_toggle(hsotg, chan, chnum, qtd);
 	}
 
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index f825380..cadba8b 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -525,11 +525,19 @@
 	u32 pid = (hctsiz & TSIZ_SC_MC_PID_MASK) >> TSIZ_SC_MC_PID_SHIFT;
 
 	if (chan->ep_type != USB_ENDPOINT_XFER_CONTROL) {
+		if (WARN(!chan || !chan->qh,
+			 "chan->qh must be specified for non-control eps\n"))
+			return;
+
 		if (pid == TSIZ_SC_MC_PID_DATA0)
 			chan->qh->data_toggle = DWC2_HC_PID_DATA0;
 		else
 			chan->qh->data_toggle = DWC2_HC_PID_DATA1;
 	} else {
+		if (WARN(!qtd,
+			 "qtd must be specified for control eps\n"))
+			return;
+
 		if (pid == TSIZ_SC_MC_PID_DATA0)
 			qtd->data_toggle = DWC2_HC_PID_DATA0;
 		else
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 510f787..690b9fd 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -530,7 +530,13 @@
 	if (retval)
 		return retval;
 
-	/* Reset the controller and detect hardware config values */
+	/*
+	 * Reset before dwc2_get_hwparams() then it could get power-on real
+	 * reset value form registers.
+	 */
+	dwc2_core_reset_and_force_dr_mode(hsotg);
+
+	/* Detect config values from hardware */
 	retval = dwc2_get_hwparams(hsotg);
 	if (retval)
 		goto error;
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 2913068..e4f8b90 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -856,7 +856,6 @@
 	unsigned		pullups_connected:1;
 	unsigned		resize_fifos:1;
 	unsigned		setup_packet_pending:1;
-	unsigned		start_config_issued:1;
 	unsigned		three_stage_setup:1;
 	unsigned		usb3_lpm_capable:1;
 
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 3a9354a..8d6b75c 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -555,7 +555,6 @@
 	int ret;
 	u32 reg;
 
-	dwc->start_config_issued = false;
 	cfg = le16_to_cpu(ctrl->wValue);
 
 	switch (state) {
@@ -737,10 +736,6 @@
 		dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY");
 		ret = dwc3_ep0_set_isoch_delay(dwc, ctrl);
 		break;
-	case USB_REQ_SET_INTERFACE:
-		dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE");
-		dwc->start_config_issued = false;
-		/* Fall through */
 	default:
 		dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver");
 		ret = dwc3_ep0_delegate_req(dwc, ctrl);
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index af023a8..2363bad 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -385,24 +385,66 @@
 	dep->trb_pool_dma = 0;
 }
 
+static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep);
+
+/**
+ * dwc3_gadget_start_config - Configure EP resources
+ * @dwc: pointer to our controller context structure
+ * @dep: endpoint that is being enabled
+ *
+ * The assignment of transfer resources cannot perfectly follow the
+ * data book due to the fact that the controller driver does not have
+ * all knowledge of the configuration in advance. It is given this
+ * information piecemeal by the composite gadget framework after every
+ * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook
+ * programming model in this scenario can cause errors. For two
+ * reasons:
+ *
+ * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION
+ * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of
+ * multiple interfaces.
+ *
+ * 2) The databook does not mention doing more DEPXFERCFG for new
+ * endpoint on alt setting (8.1.6).
+ *
+ * The following simplified method is used instead:
+ *
+ * All hardware endpoints can be assigned a transfer resource and this
+ * setting will stay persistent until either a core reset or
+ * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and
+ * do DEPXFERCFG for every hardware endpoint as well. We are
+ * guaranteed that there are as many transfer resources as endpoints.
+ *
+ * This function is called for each endpoint when it is being enabled
+ * but is triggered only when called for EP0-out, which always happens
+ * first, and which should only happen in one of the above conditions.
+ */
 static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
 {
 	struct dwc3_gadget_ep_cmd_params params;
 	u32			cmd;
+	int			i;
+	int			ret;
+
+	if (dep->number)
+		return 0;
 
 	memset(&params, 0x00, sizeof(params));
+	cmd = DWC3_DEPCMD_DEPSTARTCFG;
 
-	if (dep->number != 1) {
-		cmd = DWC3_DEPCMD_DEPSTARTCFG;
-		/* XferRscIdx == 0 for ep0 and 2 for the remaining */
-		if (dep->number > 1) {
-			if (dwc->start_config_issued)
-				return 0;
-			dwc->start_config_issued = true;
-			cmd |= DWC3_DEPCMD_PARAM(2);
-		}
+	ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+	if (ret)
+		return ret;
 
-		return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+	for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) {
+		struct dwc3_ep *dep = dwc->eps[i];
+
+		if (!dep)
+			continue;
+
+		ret = dwc3_gadget_set_xfer_resource(dwc, dep);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -516,10 +558,6 @@
 		struct dwc3_trb	*trb_st_hw;
 		struct dwc3_trb	*trb_link;
 
-		ret = dwc3_gadget_set_xfer_resource(dwc, dep);
-		if (ret)
-			return ret;
-
 		dep->endpoint.desc = desc;
 		dep->comp_desc = comp_desc;
 		dep->type = usb_endpoint_type(desc);
@@ -1636,8 +1674,6 @@
 	}
 	dwc3_writel(dwc->regs, DWC3_DCFG, reg);
 
-	dwc->start_config_issued = false;
-
 	/* Start with SuperSpeed Default */
 	dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
 
@@ -2237,7 +2273,6 @@
 	dwc3_writel(dwc->regs, DWC3_DCTL, reg);
 
 	dwc3_disconnect_gadget(dwc);
-	dwc->start_config_issued = false;
 
 	dwc->gadget.speed = USB_SPEED_UNKNOWN;
 	dwc->setup_packet_pending = false;
@@ -2288,7 +2323,6 @@
 
 	dwc3_stop_active_transfers(dwc);
 	dwc3_clear_stall_all_ep(dwc);
-	dwc->start_config_issued = false;
 
 	/* Reset device address to zero */
 	reg = dwc3_readl(dwc->regs, DWC3_DCFG);
@@ -2789,6 +2823,7 @@
 	dwc->gadget.speed		= USB_SPEED_UNKNOWN;
 	dwc->gadget.sg_supported	= true;
 	dwc->gadget.name		= "dwc3-gadget";
+	dwc->gadget.is_otg		= dwc->dr_mode == USB_DR_MODE_OTG;
 
 	/*
 	 * FIXME We might be setting max_speed to <SUPER, however versions
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 7e179f8..87fb0fd 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -130,7 +130,8 @@
 					setup_can_stall : 1,
 					setup_out_ready : 1,
 					setup_out_error : 1,
-					setup_abort : 1;
+					setup_abort : 1,
+					gadget_registered : 1;
 	unsigned			setup_wLength;
 
 	/* the rest is basically write-once */
@@ -1179,7 +1180,8 @@
 
 	/* closing ep0 === shutdown all */
 
-	usb_gadget_unregister_driver (&gadgetfs_driver);
+	if (dev->gadget_registered)
+		usb_gadget_unregister_driver (&gadgetfs_driver);
 
 	/* at this point "good" hardware has disconnected the
 	 * device from USB; the host won't see it any more.
@@ -1847,6 +1849,7 @@
 		 * kick in after the ep0 descriptor is closed.
 		 */
 		value = len;
+		dev->gadget_registered = true;
 	}
 	return value;
 
diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
index 53c0692..93d28cb 100644
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
@@ -2340,7 +2340,7 @@
 {
 	struct qe_udc *udc;
 	struct device_node *np = ofdev->dev.of_node;
-	unsigned int tmp_addr = 0;
+	unsigned long tmp_addr = 0;
 	struct usb_device_para __iomem *usbpram;
 	unsigned int i;
 	u64 size;
diff --git a/drivers/usb/gadget/udc/net2280.h b/drivers/usb/gadget/udc/net2280.h
index 4dff60d..0d32052 100644
--- a/drivers/usb/gadget/udc/net2280.h
+++ b/drivers/usb/gadget/udc/net2280.h
@@ -369,9 +369,20 @@
 	static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
 					  0x50, 0x20, 0x70, 0x40, 0x90 };
 
-	if (ep->dev->enhanced_mode)
+	if (ep->dev->enhanced_mode) {
 		reg = ep_enhanced[ep->num];
-	else{
+		switch (ep->dev->gadget.speed) {
+		case USB_SPEED_SUPER:
+			reg += 2;
+			break;
+		case USB_SPEED_FULL:
+			reg += 1;
+			break;
+		case USB_SPEED_HIGH:
+		default:
+			break;
+		}
+	} else {
 		reg = (ep->num + 1) * 0x10;
 		if (ep->dev->gadget.speed != USB_SPEED_HIGH)
 			reg += 1;
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index fd73a3e..b86a6f0 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -413,9 +413,10 @@
 		if (!driver->udc_name || strcmp(driver->udc_name,
 						dev_name(&udc->dev)) == 0) {
 			ret = udc_bind_to_driver(udc, driver);
+			if (ret != -EPROBE_DEFER)
+				list_del(&driver->pending);
 			if (ret)
 				goto err4;
-			list_del(&driver->pending);
 			break;
 		}
 	}
diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h
index 04ce6b1..e0244fb 100644
--- a/drivers/usb/host/xhci-ext-caps.h
+++ b/drivers/usb/host/xhci-ext-caps.h
@@ -112,12 +112,16 @@
 	offset = start;
 	if (!start || start == XHCI_HCC_PARAMS_OFFSET) {
 		val = readl(base + XHCI_HCC_PARAMS_OFFSET);
+		if (val == ~0)
+			return 0;
 		offset = XHCI_HCC_EXT_CAPS(val) << 2;
 		if (!offset)
 			return 0;
 	};
 	do {
 		val = readl(base + offset);
+		if (val == ~0)
+			return 0;
 		if (XHCI_EXT_CAPS_ID(val) == id && offset != start)
 			return offset;
 
diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c
index c30de7c..73f763c 100644
--- a/drivers/usb/host/xhci-mtk-sch.c
+++ b/drivers/usb/host/xhci-mtk-sch.c
@@ -275,8 +275,9 @@
 		return false;
 
 	/*
-	 * for LS & FS periodic endpoints which its device don't attach
-	 * to TT are also ignored, root-hub will schedule them directly
+	 * for LS & FS periodic endpoints which its device is not behind
+	 * a TT are also ignored, root-hub will schedule them directly,
+	 * but need set @bpkts field of endpoint context to 1.
 	 */
 	if (is_fs_or_ls(speed) && !has_tt)
 		return false;
@@ -339,8 +340,17 @@
 		GET_MAX_PACKET(usb_endpoint_maxp(&ep->desc)),
 		usb_endpoint_dir_in(&ep->desc), ep);
 
-	if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT))
+	if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT)) {
+		/*
+		 * set @bpkts to 1 if it is LS or FS periodic endpoint, and its
+		 * device does not connected through an external HS hub
+		 */
+		if (usb_endpoint_xfer_int(&ep->desc)
+			|| usb_endpoint_xfer_isoc(&ep->desc))
+			ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(1));
+
 		return 0;
+	}
 
 	bw_index = get_bw_index(xhci, udev, ep);
 	sch_bw = &sch_array[bw_index];
diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index c9ab6a4..9532f5a 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -696,9 +696,24 @@
 }
 
 #ifdef CONFIG_PM_SLEEP
+/*
+ * if ip sleep fails, and all clocks are disabled, access register will hang
+ * AHB bus, so stop polling roothubs to avoid regs access on bus suspend.
+ * and no need to check whether ip sleep failed or not; this will cause SPM
+ * to wake up system immediately after system suspend complete if ip sleep
+ * fails, it is what we wanted.
+ */
 static int xhci_mtk_suspend(struct device *dev)
 {
 	struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+	struct usb_hcd *hcd = mtk->hcd;
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+	xhci_dbg(xhci, "%s: stop port polling\n", __func__);
+	clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+	del_timer_sync(&hcd->rh_timer);
+	clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+	del_timer_sync(&xhci->shared_hcd->rh_timer);
 
 	xhci_mtk_host_disable(mtk);
 	xhci_mtk_phy_power_off(mtk);
@@ -710,11 +725,19 @@
 static int xhci_mtk_resume(struct device *dev)
 {
 	struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+	struct usb_hcd *hcd = mtk->hcd;
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
 	usb_wakeup_disable(mtk);
 	xhci_mtk_clks_enable(mtk);
 	xhci_mtk_phy_power_on(mtk);
 	xhci_mtk_host_enable(mtk);
+
+	xhci_dbg(xhci, "%s: restart port polling\n", __func__);
+	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+	usb_hcd_poll_rh_status(hcd);
+	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+	usb_hcd_poll_rh_status(xhci->shared_hcd);
 	return 0;
 }
 
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 58c43ed..f0640b7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -28,7 +28,9 @@
 #include "xhci.h"
 #include "xhci-trace.h"
 
-#define PORT2_SSIC_CONFIG_REG2	0x883c
+#define SSIC_PORT_NUM		2
+#define SSIC_PORT_CFG2		0x880c
+#define SSIC_PORT_CFG2_OFFSET	0x30
 #define PROG_DONE		(1 << 30)
 #define SSIC_PORT_UNUSED	(1 << 31)
 
@@ -45,6 +47,7 @@
 #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI		0x22b5
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI		0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI	0x9d2f
+#define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI		0x0aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -151,9 +154,14 @@
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 		(pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
-		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) {
+		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) {
 		xhci->quirks |= XHCI_PME_STUCK_QUIRK;
 	}
+	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+		xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
+	}
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
 			pdev->device == PCI_DEVICE_ID_EJ168) {
 		xhci->quirks |= XHCI_RESET_ON_RESUME;
@@ -312,22 +320,20 @@
  * SSIC PORT need to be marked as "unused" before putting xHCI
  * into D3. After D3 exit, the SSIC port need to be marked as "used".
  * Without this change, xHCI might not enter D3 state.
- * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
- * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
  */
-static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
+static void xhci_ssic_port_unused_quirk(struct usb_hcd *hcd, bool suspend)
 {
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
-	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
 	u32 val;
 	void __iomem *reg;
+	int i;
 
-	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+	for (i = 0; i < SSIC_PORT_NUM; i++) {
+		reg = (void __iomem *) xhci->cap_regs +
+				SSIC_PORT_CFG2 +
+				i * SSIC_PORT_CFG2_OFFSET;
 
-		reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2;
-
-		/* Notify SSIC that SSIC profile programming is not done */
+		/* Notify SSIC that SSIC profile programming is not done. */
 		val = readl(reg) & ~PROG_DONE;
 		writel(val, reg);
 
@@ -344,6 +350,17 @@
 		writel(val, reg);
 		readl(reg);
 	}
+}
+
+/*
+ * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
+ * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
+ */
+static void xhci_pme_quirk(struct usb_hcd *hcd)
+{
+	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
+	void __iomem *reg;
+	u32 val;
 
 	reg = (void __iomem *) xhci->cap_regs + 0x80a4;
 	val = readl(reg);
@@ -355,6 +372,7 @@
 {
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
+	int			ret;
 
 	/*
 	 * Systems with the TI redriver that loses port status change events
@@ -364,9 +382,16 @@
 		pdev->no_d3cold = true;
 
 	if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-		xhci_pme_quirk(hcd, true);
+		xhci_pme_quirk(hcd);
 
-	return xhci_suspend(xhci, do_wakeup);
+	if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+		xhci_ssic_port_unused_quirk(hcd, true);
+
+	ret = xhci_suspend(xhci, do_wakeup);
+	if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED))
+		xhci_ssic_port_unused_quirk(hcd, false);
+
+	return ret;
 }
 
 static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
@@ -396,8 +421,11 @@
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL)
 		usb_enable_intel_xhci_ports(pdev);
 
+	if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+		xhci_ssic_port_unused_quirk(hcd, false);
+
 	if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-		xhci_pme_quirk(hcd, false);
+		xhci_pme_quirk(hcd);
 
 	retval = xhci_resume(xhci, hibernated);
 	return retval;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 770b6b0..d39d6bf 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -184,7 +184,8 @@
 		struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
 
 		/* Just copy data for now */
-		*priv = *priv_match;
+		if (priv_match)
+			*priv = *priv_match;
 	}
 
 	if (xhci_plat_type_is(hcd, XHCI_PLAT_TYPE_MARVELL_ARMADA)) {
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f1c21c4..3915657 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2193,10 +2193,6 @@
 		}
 	/* Fast path - was this the last TRB in the TD for this URB? */
 	} else if (event_trb == td->last_trb) {
-		if (td->urb_length_set && trb_comp_code == COMP_SHORT_TX)
-			return finish_td(xhci, td, event_trb, event, ep,
-					 status, false);
-
 		if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
 			td->urb->actual_length =
 				td->urb->transfer_buffer_length -
@@ -2248,12 +2244,6 @@
 			td->urb->actual_length +=
 				TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
 				EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
-
-		if (trb_comp_code == COMP_SHORT_TX) {
-			xhci_dbg(xhci, "mid bulk/intr SP, wait for last TRB event\n");
-			td->urb_length_set = true;
-			return 0;
-		}
 	}
 
 	return finish_td(xhci, td, event_trb, event, ep, status, false);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 26a44c0..0c8087d 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1554,7 +1554,9 @@
 		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 				"HW died, freeing TD.");
 		urb_priv = urb->hcpriv;
-		for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+		for (i = urb_priv->td_cnt;
+		     i < urb_priv->length && xhci->devs[urb->dev->slot_id];
+		     i++) {
 			td = urb_priv->td[i];
 			if (!list_empty(&td->td_list))
 				list_del_init(&td->td_list);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 9be7348..cc65138 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1631,6 +1631,7 @@
 #define XHCI_BROKEN_STREAMS	(1 << 19)
 #define XHCI_PME_STUCK_QUIRK	(1 << 20)
 #define XHCI_MTK_HOST		(1 << 21)
+#define XHCI_SSIC_PORT_UNUSED	(1 << 22)
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
 	/* There are two roothubs to keep track of bus suspend info for */
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 795a45b..58487a4 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -662,7 +662,7 @@
 		csr &= ~(MUSB_TXCSR_AUTOSET | MUSB_TXCSR_DMAMODE);
 		csr |= MUSB_TXCSR_DMAENAB; /* against programmer's guide */
 	}
-	channel->desired_mode = mode;
+	channel->desired_mode = *mode;
 	musb_writew(epio, MUSB_TXCSR, csr);
 
 	return 0;
@@ -2003,10 +2003,8 @@
 				qh->offset,
 				urb->transfer_buffer_length);
 
-			done = musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh,
-							      urb, xfer_len,
-							      iso_err);
-			if (done)
+			if (musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh, urb,
+							   xfer_len, iso_err))
 				goto finish;
 			else
 				dev_err(musb->controller, "error: rx_dma failed\n");
diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c
index b2685e7..3eaa4ba 100644
--- a/drivers/usb/musb/ux500.c
+++ b/drivers/usb/musb/ux500.c
@@ -348,7 +348,9 @@
 	struct ux500_glue	*glue = dev_get_drvdata(dev);
 	struct musb		*musb = glue_to_musb(glue);
 
-	usb_phy_set_suspend(musb->xceiv, 1);
+	if (musb)
+		usb_phy_set_suspend(musb->xceiv, 1);
+
 	clk_disable_unprepare(glue->clk);
 
 	return 0;
@@ -366,7 +368,8 @@
 		return ret;
 	}
 
-	usb_phy_set_suspend(musb->xceiv, 0);
+	if (musb)
+		usb_phy_set_suspend(musb->xceiv, 0);
 
 	return 0;
 }
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 0d19a6d..72b387d 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -757,14 +757,8 @@
 	otg->host = host;
 	dev_dbg(otg->usb_phy->dev, "host driver registered w/ tranceiver\n");
 
-	/*
-	 * Kick the state machine work, if peripheral is not supported
-	 * or peripheral is already registered with us.
-	 */
-	if (motg->pdata->mode == USB_DR_MODE_HOST || otg->gadget) {
-		pm_runtime_get_sync(otg->usb_phy->dev);
-		schedule_work(&motg->sm_work);
-	}
+	pm_runtime_get_sync(otg->usb_phy->dev);
+	schedule_work(&motg->sm_work);
 
 	return 0;
 }
@@ -827,14 +821,8 @@
 	dev_dbg(otg->usb_phy->dev,
 		"peripheral driver registered w/ tranceiver\n");
 
-	/*
-	 * Kick the state machine work, if host is not supported
-	 * or host is already registered with us.
-	 */
-	if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL || otg->host) {
-		pm_runtime_get_sync(otg->usb_phy->dev);
-		schedule_work(&motg->sm_work);
-	}
+	pm_runtime_get_sync(otg->usb_phy->dev);
+	schedule_work(&motg->sm_work);
 
 	return 0;
 }
@@ -1599,6 +1587,8 @@
 						&motg->id.nb);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "register ID notifier failed\n");
+			extcon_unregister_notifier(motg->vbus.extcon,
+						   EXTCON_USB, &motg->vbus.nb);
 			return ret;
 		}
 
@@ -1660,15 +1650,6 @@
 	if (!motg)
 		return -ENOMEM;
 
-	pdata = dev_get_platdata(&pdev->dev);
-	if (!pdata) {
-		if (!np)
-			return -ENXIO;
-		ret = msm_otg_read_dt(pdev, motg);
-		if (ret)
-			return ret;
-	}
-
 	motg->phy.otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
 				     GFP_KERNEL);
 	if (!motg->phy.otg)
@@ -1710,6 +1691,15 @@
 	if (!motg->regs)
 		return -ENOMEM;
 
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		if (!np)
+			return -ENXIO;
+		ret = msm_otg_read_dt(pdev, motg);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * NOTE: The PHYs can be multiplexed between the chipidea controller
 	 * and the dwc3 controller, using a single bit. It is important that
@@ -1717,8 +1707,10 @@
 	 */
 	if (motg->phy_number) {
 		phy_select = devm_ioremap_nocache(&pdev->dev, USB2_PHY_SEL, 4);
-		if (!phy_select)
-			return -ENOMEM;
+		if (!phy_select) {
+			ret = -ENOMEM;
+			goto unregister_extcon;
+		}
 		/* Enable second PHY with the OTG port */
 		writel(0x1, phy_select);
 	}
@@ -1728,7 +1720,8 @@
 	motg->irq = platform_get_irq(pdev, 0);
 	if (motg->irq < 0) {
 		dev_err(&pdev->dev, "platform_get_irq failed\n");
-		return motg->irq;
+		ret = motg->irq;
+		goto unregister_extcon;
 	}
 
 	regs[0].supply = "vddcx";
@@ -1737,7 +1730,7 @@
 
 	ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(regs), regs);
 	if (ret)
-		return ret;
+		goto unregister_extcon;
 
 	motg->vddcx = regs[0].consumer;
 	motg->v3p3  = regs[1].consumer;
@@ -1834,6 +1827,12 @@
 	clk_disable_unprepare(motg->clk);
 	if (!IS_ERR(motg->core_clk))
 		clk_disable_unprepare(motg->core_clk);
+unregister_extcon:
+	extcon_unregister_notifier(motg->id.extcon,
+				   EXTCON_USB_HOST, &motg->id.nb);
+	extcon_unregister_notifier(motg->vbus.extcon,
+				   EXTCON_USB, &motg->vbus.nb);
+
 	return ret;
 }
 
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index c2936dc..00bfea0 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -220,7 +220,7 @@
 /* Return true if the vbus is there */
 static bool mxs_phy_get_vbus_status(struct mxs_phy *mxs_phy)
 {
-	unsigned int vbus_value;
+	unsigned int vbus_value = 0;
 
 	if (!mxs_phy->regmap_anatop)
 		return false;
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index f612dda..56ecb8b 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -475,22 +475,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called mos7840.  If unsure, choose N.
 
-config USB_SERIAL_MXUPORT11
-	tristate "USB Moxa UPORT 11x0 Serial Driver"
-	---help---
-	  Say Y here if you want to use a MOXA UPort 11x0 Serial hub.
-
-	  This driver supports:
-
-	  - UPort 1110  : 1 port RS-232 USB to Serial Hub.
-	  - UPort 1130  : 1 port RS-422/485 USB to Serial Hub.
-	  - UPort 1130I : 1 port RS-422/485 USB to Serial Hub with Isolation.
-	  - UPort 1150  : 1 port RS-232/422/485 USB to Serial Hub.
-	  - UPort 1150I : 1 port RS-232/422/485 USB to Serial Hub with Isolation.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called mxu11x0.
-
 config USB_SERIAL_MXUPORT
 	tristate "USB Moxa UPORT Serial Driver"
 	---help---
diff --git a/drivers/usb/serial/Makefile b/drivers/usb/serial/Makefile
index f3fa5e5..349d9df 100644
--- a/drivers/usb/serial/Makefile
+++ b/drivers/usb/serial/Makefile
@@ -38,7 +38,6 @@
 obj-$(CONFIG_USB_SERIAL_MOS7720)		+= mos7720.o
 obj-$(CONFIG_USB_SERIAL_MOS7840)		+= mos7840.o
 obj-$(CONFIG_USB_SERIAL_MXUPORT)		+= mxuport.o
-obj-$(CONFIG_USB_SERIAL_MXUPORT11)		+= mxu11x0.o
 obj-$(CONFIG_USB_SERIAL_NAVMAN)			+= navman.o
 obj-$(CONFIG_USB_SERIAL_OMNINET)		+= omninet.o
 obj-$(CONFIG_USB_SERIAL_OPTICON)		+= opticon.o
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 9b90ad7..73a366d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -99,6 +99,7 @@
 	{ USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
 	{ USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
 	{ USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
+	{ USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
 	{ USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
 	{ USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
 	{ USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */
@@ -162,6 +163,9 @@
 	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
 	{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+	{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
+	{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
+	{ USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
 	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
 	{ USB_DEVICE(0x1BA4, 0x0002) },	/* Silicon Labs 358x factory default */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index a5a0376..8c660ae 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -824,6 +824,7 @@
 	{ USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
+	{ USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
 	{ USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 
 	/* Papouch devices based on FTDI chip */
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 67c6d44..a84df25 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -615,6 +615,7 @@
  */
 #define RATOC_VENDOR_ID		0x0584
 #define RATOC_PRODUCT_ID_USB60F	0xb020
+#define RATOC_PRODUCT_ID_SCU18	0xb03a
 
 /*
  * Infineon Technologies
diff --git a/drivers/usb/serial/mxu11x0.c b/drivers/usb/serial/mxu11x0.c
deleted file mode 100644
index e3c3f57c..0000000
--- a/drivers/usb/serial/mxu11x0.c
+++ /dev/null
@@ -1,986 +0,0 @@
-/*
- * USB Moxa UPORT 11x0 Serial Driver
- *
- * Copyright (C) 2007 MOXA Technologies Co., Ltd.
- * Copyright (C) 2015 Mathieu Othacehe <m.othacehe@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- *
- * Supports the following Moxa USB to serial converters:
- *  UPort 1110,  1 port RS-232 USB to Serial Hub.
- *  UPort 1130,  1 port RS-422/485 USB to Serial Hub.
- *  UPort 1130I, 1 port RS-422/485 USB to Serial Hub with isolation
- *    protection.
- *  UPort 1150,  1 port RS-232/422/485 USB to Serial Hub.
- *  UPort 1150I, 1 port RS-232/422/485 USB to Serial Hub with isolation
- *  protection.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/firmware.h>
-#include <linux/jiffies.h>
-#include <linux/serial.h>
-#include <linux/serial_reg.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/tty_flip.h>
-#include <linux/uaccess.h>
-#include <linux/usb.h>
-#include <linux/usb/serial.h>
-
-/* Vendor and product ids */
-#define MXU1_VENDOR_ID				0x110a
-#define MXU1_1110_PRODUCT_ID			0x1110
-#define MXU1_1130_PRODUCT_ID			0x1130
-#define MXU1_1150_PRODUCT_ID			0x1150
-#define MXU1_1151_PRODUCT_ID			0x1151
-#define MXU1_1131_PRODUCT_ID			0x1131
-
-/* Commands */
-#define MXU1_GET_VERSION			0x01
-#define MXU1_GET_PORT_STATUS			0x02
-#define MXU1_GET_PORT_DEV_INFO			0x03
-#define MXU1_GET_CONFIG				0x04
-#define MXU1_SET_CONFIG				0x05
-#define MXU1_OPEN_PORT				0x06
-#define MXU1_CLOSE_PORT				0x07
-#define MXU1_START_PORT				0x08
-#define MXU1_STOP_PORT				0x09
-#define MXU1_TEST_PORT				0x0A
-#define MXU1_PURGE_PORT				0x0B
-#define MXU1_RESET_EXT_DEVICE			0x0C
-#define MXU1_GET_OUTQUEUE			0x0D
-#define MXU1_WRITE_DATA				0x80
-#define MXU1_READ_DATA				0x81
-#define MXU1_REQ_TYPE_CLASS			0x82
-
-/* Module identifiers */
-#define MXU1_I2C_PORT				0x01
-#define MXU1_IEEE1284_PORT			0x02
-#define MXU1_UART1_PORT				0x03
-#define MXU1_UART2_PORT				0x04
-#define MXU1_RAM_PORT				0x05
-
-/* Modem status */
-#define MXU1_MSR_DELTA_CTS			0x01
-#define MXU1_MSR_DELTA_DSR			0x02
-#define MXU1_MSR_DELTA_RI			0x04
-#define MXU1_MSR_DELTA_CD			0x08
-#define MXU1_MSR_CTS				0x10
-#define MXU1_MSR_DSR				0x20
-#define MXU1_MSR_RI				0x40
-#define MXU1_MSR_CD				0x80
-#define MXU1_MSR_DELTA_MASK			0x0F
-#define MXU1_MSR_MASK				0xF0
-
-/* Line status */
-#define MXU1_LSR_OVERRUN_ERROR			0x01
-#define MXU1_LSR_PARITY_ERROR			0x02
-#define MXU1_LSR_FRAMING_ERROR			0x04
-#define MXU1_LSR_BREAK				0x08
-#define MXU1_LSR_ERROR				0x0F
-#define MXU1_LSR_RX_FULL			0x10
-#define MXU1_LSR_TX_EMPTY			0x20
-
-/* Modem control */
-#define MXU1_MCR_LOOP				0x04
-#define MXU1_MCR_DTR				0x10
-#define MXU1_MCR_RTS				0x20
-
-/* Mask settings */
-#define MXU1_UART_ENABLE_RTS_IN			0x0001
-#define MXU1_UART_DISABLE_RTS			0x0002
-#define MXU1_UART_ENABLE_PARITY_CHECKING	0x0008
-#define MXU1_UART_ENABLE_DSR_OUT		0x0010
-#define MXU1_UART_ENABLE_CTS_OUT		0x0020
-#define MXU1_UART_ENABLE_X_OUT			0x0040
-#define MXU1_UART_ENABLE_XA_OUT			0x0080
-#define MXU1_UART_ENABLE_X_IN			0x0100
-#define MXU1_UART_ENABLE_DTR_IN			0x0800
-#define MXU1_UART_DISABLE_DTR			0x1000
-#define MXU1_UART_ENABLE_MS_INTS		0x2000
-#define MXU1_UART_ENABLE_AUTO_START_DMA		0x4000
-#define MXU1_UART_SEND_BREAK_SIGNAL		0x8000
-
-/* Parity */
-#define MXU1_UART_NO_PARITY			0x00
-#define MXU1_UART_ODD_PARITY			0x01
-#define MXU1_UART_EVEN_PARITY			0x02
-#define MXU1_UART_MARK_PARITY			0x03
-#define MXU1_UART_SPACE_PARITY			0x04
-
-/* Stop bits */
-#define MXU1_UART_1_STOP_BITS			0x00
-#define MXU1_UART_1_5_STOP_BITS			0x01
-#define MXU1_UART_2_STOP_BITS			0x02
-
-/* Bits per character */
-#define MXU1_UART_5_DATA_BITS			0x00
-#define MXU1_UART_6_DATA_BITS			0x01
-#define MXU1_UART_7_DATA_BITS			0x02
-#define MXU1_UART_8_DATA_BITS			0x03
-
-/* Operation modes */
-#define MXU1_UART_232				0x00
-#define MXU1_UART_485_RECEIVER_DISABLED		0x01
-#define MXU1_UART_485_RECEIVER_ENABLED		0x02
-
-/* Pipe transfer mode and timeout */
-#define MXU1_PIPE_MODE_CONTINUOUS		0x01
-#define MXU1_PIPE_MODE_MASK			0x03
-#define MXU1_PIPE_TIMEOUT_MASK			0x7C
-#define MXU1_PIPE_TIMEOUT_ENABLE		0x80
-
-/* Config struct */
-struct mxu1_uart_config {
-	__be16	wBaudRate;
-	__be16	wFlags;
-	u8	bDataBits;
-	u8	bParity;
-	u8	bStopBits;
-	char	cXon;
-	char	cXoff;
-	u8	bUartMode;
-} __packed;
-
-/* Purge modes */
-#define MXU1_PURGE_OUTPUT			0x00
-#define MXU1_PURGE_INPUT			0x80
-
-/* Read/Write data */
-#define MXU1_RW_DATA_ADDR_SFR			0x10
-#define MXU1_RW_DATA_ADDR_IDATA			0x20
-#define MXU1_RW_DATA_ADDR_XDATA			0x30
-#define MXU1_RW_DATA_ADDR_CODE			0x40
-#define MXU1_RW_DATA_ADDR_GPIO			0x50
-#define MXU1_RW_DATA_ADDR_I2C			0x60
-#define MXU1_RW_DATA_ADDR_FLASH			0x70
-#define MXU1_RW_DATA_ADDR_DSP			0x80
-
-#define MXU1_RW_DATA_UNSPECIFIED		0x00
-#define MXU1_RW_DATA_BYTE			0x01
-#define MXU1_RW_DATA_WORD			0x02
-#define MXU1_RW_DATA_DOUBLE_WORD		0x04
-
-struct mxu1_write_data_bytes {
-	u8	bAddrType;
-	u8	bDataType;
-	u8	bDataCounter;
-	__be16	wBaseAddrHi;
-	__be16	wBaseAddrLo;
-	u8	bData[0];
-} __packed;
-
-/* Interrupt codes */
-#define MXU1_CODE_HARDWARE_ERROR		0xFF
-#define MXU1_CODE_DATA_ERROR			0x03
-#define MXU1_CODE_MODEM_STATUS			0x04
-
-static inline int mxu1_get_func_from_code(unsigned char code)
-{
-	return code & 0x0f;
-}
-
-/* Download firmware max packet size */
-#define MXU1_DOWNLOAD_MAX_PACKET_SIZE		64
-
-/* Firmware image header */
-struct mxu1_firmware_header {
-	__le16 wLength;
-	u8 bCheckSum;
-} __packed;
-
-#define MXU1_UART_BASE_ADDR	    0xFFA0
-#define MXU1_UART_OFFSET_MCR	    0x0004
-
-#define MXU1_BAUD_BASE              923077
-
-#define MXU1_TRANSFER_TIMEOUT	    2
-#define MXU1_DOWNLOAD_TIMEOUT       1000
-#define MXU1_DEFAULT_CLOSING_WAIT   4000 /* in .01 secs */
-
-struct mxu1_port {
-	u8 msr;
-	u8 mcr;
-	u8 uart_mode;
-	spinlock_t spinlock; /* Protects msr */
-	struct mutex mutex; /* Protects mcr */
-	bool send_break;
-};
-
-struct mxu1_device {
-	u16 mxd_model;
-};
-
-static const struct usb_device_id mxu1_idtable[] = {
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) },
-	{ }
-};
-
-MODULE_DEVICE_TABLE(usb, mxu1_idtable);
-
-/* Write the given buffer out to the control pipe.  */
-static int mxu1_send_ctrl_data_urb(struct usb_serial *serial,
-				   u8 request,
-				   u16 value, u16 index,
-				   void *data, size_t size)
-{
-	int status;
-
-	status = usb_control_msg(serial->dev,
-				 usb_sndctrlpipe(serial->dev, 0),
-				 request,
-				 (USB_DIR_OUT | USB_TYPE_VENDOR |
-				  USB_RECIP_DEVICE), value, index,
-				 data, size,
-				 USB_CTRL_SET_TIMEOUT);
-	if (status < 0) {
-		dev_err(&serial->interface->dev,
-			"%s - usb_control_msg failed: %d\n",
-			__func__, status);
-		return status;
-	}
-
-	if (status != size) {
-		dev_err(&serial->interface->dev,
-			"%s - short write (%d / %zd)\n",
-			__func__, status, size);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-/* Send a vendor request without any data */
-static int mxu1_send_ctrl_urb(struct usb_serial *serial,
-			      u8 request, u16 value, u16 index)
-{
-	return mxu1_send_ctrl_data_urb(serial, request, value, index,
-				       NULL, 0);
-}
-
-static int mxu1_download_firmware(struct usb_serial *serial,
-				  const struct firmware *fw_p)
-{
-	int status = 0;
-	int buffer_size;
-	int pos;
-	int len;
-	int done;
-	u8 cs = 0;
-	u8 *buffer;
-	struct usb_device *dev = serial->dev;
-	struct mxu1_firmware_header *header;
-	unsigned int pipe;
-
-	pipe = usb_sndbulkpipe(dev, serial->port[0]->bulk_out_endpointAddress);
-
-	buffer_size = fw_p->size + sizeof(*header);
-	buffer = kmalloc(buffer_size, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	memcpy(buffer, fw_p->data, fw_p->size);
-	memset(buffer + fw_p->size, 0xff, buffer_size - fw_p->size);
-
-	for (pos = sizeof(*header); pos < buffer_size; pos++)
-		cs = (u8)(cs + buffer[pos]);
-
-	header = (struct mxu1_firmware_header *)buffer;
-	header->wLength = cpu_to_le16(buffer_size - sizeof(*header));
-	header->bCheckSum = cs;
-
-	dev_dbg(&dev->dev, "%s - downloading firmware\n", __func__);
-
-	for (pos = 0; pos < buffer_size; pos += done) {
-		len = min(buffer_size - pos, MXU1_DOWNLOAD_MAX_PACKET_SIZE);
-
-		status = usb_bulk_msg(dev, pipe, buffer + pos, len, &done,
-				MXU1_DOWNLOAD_TIMEOUT);
-		if (status)
-			break;
-	}
-
-	kfree(buffer);
-
-	if (status) {
-		dev_err(&dev->dev, "failed to download firmware: %d\n", status);
-		return status;
-	}
-
-	msleep_interruptible(100);
-	usb_reset_device(dev);
-
-	dev_dbg(&dev->dev, "%s - download successful\n", __func__);
-
-	return 0;
-}
-
-static int mxu1_port_probe(struct usb_serial_port *port)
-{
-	struct mxu1_port *mxport;
-	struct mxu1_device *mxdev;
-
-	if (!port->interrupt_in_urb) {
-		dev_err(&port->dev, "no interrupt urb\n");
-		return -ENODEV;
-	}
-
-	mxport = kzalloc(sizeof(struct mxu1_port), GFP_KERNEL);
-	if (!mxport)
-		return -ENOMEM;
-
-	spin_lock_init(&mxport->spinlock);
-	mutex_init(&mxport->mutex);
-
-	mxdev = usb_get_serial_data(port->serial);
-
-	switch (mxdev->mxd_model) {
-	case MXU1_1110_PRODUCT_ID:
-	case MXU1_1150_PRODUCT_ID:
-	case MXU1_1151_PRODUCT_ID:
-		mxport->uart_mode = MXU1_UART_232;
-		break;
-	case MXU1_1130_PRODUCT_ID:
-	case MXU1_1131_PRODUCT_ID:
-		mxport->uart_mode = MXU1_UART_485_RECEIVER_DISABLED;
-		break;
-	}
-
-	usb_set_serial_port_data(port, mxport);
-
-	port->port.closing_wait =
-			msecs_to_jiffies(MXU1_DEFAULT_CLOSING_WAIT * 10);
-	port->port.drain_delay = 1;
-
-	return 0;
-}
-
-static int mxu1_startup(struct usb_serial *serial)
-{
-	struct mxu1_device *mxdev;
-	struct usb_device *dev = serial->dev;
-	struct usb_host_interface *cur_altsetting;
-	char fw_name[32];
-	const struct firmware *fw_p = NULL;
-	int err;
-
-	dev_dbg(&serial->interface->dev, "%s - product 0x%04X, num configurations %d, configuration value %d\n",
-		__func__, le16_to_cpu(dev->descriptor.idProduct),
-		dev->descriptor.bNumConfigurations,
-		dev->actconfig->desc.bConfigurationValue);
-
-	/* create device structure */
-	mxdev = kzalloc(sizeof(struct mxu1_device), GFP_KERNEL);
-	if (!mxdev)
-		return -ENOMEM;
-
-	usb_set_serial_data(serial, mxdev);
-
-	mxdev->mxd_model = le16_to_cpu(dev->descriptor.idProduct);
-
-	cur_altsetting = serial->interface->cur_altsetting;
-
-	/* if we have only 1 configuration, download firmware */
-	if (cur_altsetting->desc.bNumEndpoints == 1) {
-
-		snprintf(fw_name,
-			 sizeof(fw_name),
-			 "moxa/moxa-%04x.fw",
-			 mxdev->mxd_model);
-
-		err = request_firmware(&fw_p, fw_name, &serial->interface->dev);
-		if (err) {
-			dev_err(&serial->interface->dev, "failed to request firmware: %d\n",
-				err);
-			goto err_free_mxdev;
-		}
-
-		err = mxu1_download_firmware(serial, fw_p);
-		if (err)
-			goto err_release_firmware;
-
-		/* device is being reset */
-		err = -ENODEV;
-		goto err_release_firmware;
-	}
-
-	return 0;
-
-err_release_firmware:
-	release_firmware(fw_p);
-err_free_mxdev:
-	kfree(mxdev);
-
-	return err;
-}
-
-static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
-			   u8 mask, u8 byte)
-{
-	int status;
-	size_t size;
-	struct mxu1_write_data_bytes *data;
-
-	dev_dbg(&port->dev, "%s - addr 0x%08X, mask 0x%02X, byte 0x%02X\n",
-		__func__, addr, mask, byte);
-
-	size = sizeof(struct mxu1_write_data_bytes) + 2;
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->bAddrType = MXU1_RW_DATA_ADDR_XDATA;
-	data->bDataType = MXU1_RW_DATA_BYTE;
-	data->bDataCounter = 1;
-	data->wBaseAddrHi = cpu_to_be16(addr >> 16);
-	data->wBaseAddrLo = cpu_to_be16(addr);
-	data->bData[0] = mask;
-	data->bData[1] = byte;
-
-	status = mxu1_send_ctrl_data_urb(port->serial, MXU1_WRITE_DATA, 0,
-					 MXU1_RAM_PORT, data, size);
-	if (status < 0)
-		dev_err(&port->dev, "%s - failed: %d\n", __func__, status);
-
-	kfree(data);
-
-	return status;
-}
-
-static int mxu1_set_mcr(struct usb_serial_port *port, unsigned int mcr)
-{
-	int status;
-
-	status = mxu1_write_byte(port,
-				 MXU1_UART_BASE_ADDR + MXU1_UART_OFFSET_MCR,
-				 MXU1_MCR_RTS | MXU1_MCR_DTR | MXU1_MCR_LOOP,
-				 mcr);
-	return status;
-}
-
-static void mxu1_set_termios(struct tty_struct *tty,
-			     struct usb_serial_port *port,
-			     struct ktermios *old_termios)
-{
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	struct mxu1_uart_config *config;
-	tcflag_t cflag, iflag;
-	speed_t baud;
-	int status;
-	unsigned int mcr;
-
-	cflag = tty->termios.c_cflag;
-	iflag = tty->termios.c_iflag;
-
-	if (old_termios &&
-	    !tty_termios_hw_change(&tty->termios, old_termios) &&
-	    tty->termios.c_iflag == old_termios->c_iflag) {
-		dev_dbg(&port->dev, "%s - nothing to change\n", __func__);
-		return;
-	}
-
-	dev_dbg(&port->dev,
-		"%s - cflag 0x%08x, iflag 0x%08x\n", __func__, cflag, iflag);
-
-	if (old_termios) {
-		dev_dbg(&port->dev, "%s - old cflag 0x%08x, old iflag 0x%08x\n",
-			__func__,
-			old_termios->c_cflag,
-			old_termios->c_iflag);
-	}
-
-	config = kzalloc(sizeof(*config), GFP_KERNEL);
-	if (!config)
-		return;
-
-	/* these flags must be set */
-	config->wFlags |= MXU1_UART_ENABLE_MS_INTS;
-	config->wFlags |= MXU1_UART_ENABLE_AUTO_START_DMA;
-	if (mxport->send_break)
-		config->wFlags |= MXU1_UART_SEND_BREAK_SIGNAL;
-	config->bUartMode = mxport->uart_mode;
-
-	switch (C_CSIZE(tty)) {
-	case CS5:
-		config->bDataBits = MXU1_UART_5_DATA_BITS;
-		break;
-	case CS6:
-		config->bDataBits = MXU1_UART_6_DATA_BITS;
-		break;
-	case CS7:
-		config->bDataBits = MXU1_UART_7_DATA_BITS;
-		break;
-	default:
-	case CS8:
-		config->bDataBits = MXU1_UART_8_DATA_BITS;
-		break;
-	}
-
-	if (C_PARENB(tty)) {
-		config->wFlags |= MXU1_UART_ENABLE_PARITY_CHECKING;
-		if (C_CMSPAR(tty)) {
-			if (C_PARODD(tty))
-				config->bParity = MXU1_UART_MARK_PARITY;
-			else
-				config->bParity = MXU1_UART_SPACE_PARITY;
-		} else {
-			if (C_PARODD(tty))
-				config->bParity = MXU1_UART_ODD_PARITY;
-			else
-				config->bParity = MXU1_UART_EVEN_PARITY;
-		}
-	} else {
-		config->bParity = MXU1_UART_NO_PARITY;
-	}
-
-	if (C_CSTOPB(tty))
-		config->bStopBits = MXU1_UART_2_STOP_BITS;
-	else
-		config->bStopBits = MXU1_UART_1_STOP_BITS;
-
-	if (C_CRTSCTS(tty)) {
-		/* RTS flow control must be off to drop RTS for baud rate B0 */
-		if (C_BAUD(tty) != B0)
-			config->wFlags |= MXU1_UART_ENABLE_RTS_IN;
-		config->wFlags |= MXU1_UART_ENABLE_CTS_OUT;
-	}
-
-	if (I_IXOFF(tty) || I_IXON(tty)) {
-		config->cXon  = START_CHAR(tty);
-		config->cXoff = STOP_CHAR(tty);
-
-		if (I_IXOFF(tty))
-			config->wFlags |= MXU1_UART_ENABLE_X_IN;
-
-		if (I_IXON(tty))
-			config->wFlags |= MXU1_UART_ENABLE_X_OUT;
-	}
-
-	baud = tty_get_baud_rate(tty);
-	if (!baud)
-		baud = 9600;
-	config->wBaudRate = MXU1_BAUD_BASE / baud;
-
-	dev_dbg(&port->dev, "%s - BaudRate=%d, wBaudRate=%d, wFlags=0x%04X, bDataBits=%d, bParity=%d, bStopBits=%d, cXon=%d, cXoff=%d, bUartMode=%d\n",
-		__func__, baud, config->wBaudRate, config->wFlags,
-		config->bDataBits, config->bParity, config->bStopBits,
-		config->cXon, config->cXoff, config->bUartMode);
-
-	cpu_to_be16s(&config->wBaudRate);
-	cpu_to_be16s(&config->wFlags);
-
-	status = mxu1_send_ctrl_data_urb(port->serial, MXU1_SET_CONFIG, 0,
-					 MXU1_UART1_PORT, config,
-					 sizeof(*config));
-	if (status)
-		dev_err(&port->dev, "cannot set config: %d\n", status);
-
-	mutex_lock(&mxport->mutex);
-	mcr = mxport->mcr;
-
-	if (C_BAUD(tty) == B0)
-		mcr &= ~(MXU1_MCR_DTR | MXU1_MCR_RTS);
-	else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
-		mcr |= MXU1_MCR_DTR | MXU1_MCR_RTS;
-
-	status = mxu1_set_mcr(port, mcr);
-	if (status)
-		dev_err(&port->dev, "cannot set modem control: %d\n", status);
-	else
-		mxport->mcr = mcr;
-
-	mutex_unlock(&mxport->mutex);
-
-	kfree(config);
-}
-
-static int mxu1_get_serial_info(struct usb_serial_port *port,
-				struct serial_struct __user *ret_arg)
-{
-	struct serial_struct ret_serial;
-	unsigned cwait;
-
-	if (!ret_arg)
-		return -EFAULT;
-
-	cwait = port->port.closing_wait;
-	if (cwait != ASYNC_CLOSING_WAIT_NONE)
-		cwait = jiffies_to_msecs(cwait) / 10;
-
-	memset(&ret_serial, 0, sizeof(ret_serial));
-
-	ret_serial.type = PORT_16550A;
-	ret_serial.line = port->minor;
-	ret_serial.port = 0;
-	ret_serial.xmit_fifo_size = port->bulk_out_size;
-	ret_serial.baud_base = MXU1_BAUD_BASE;
-	ret_serial.close_delay = 5*HZ;
-	ret_serial.closing_wait = cwait;
-
-	if (copy_to_user(ret_arg, &ret_serial, sizeof(*ret_arg)))
-		return -EFAULT;
-
-	return 0;
-}
-
-
-static int mxu1_set_serial_info(struct usb_serial_port *port,
-				struct serial_struct __user *new_arg)
-{
-	struct serial_struct new_serial;
-	unsigned cwait;
-
-	if (copy_from_user(&new_serial, new_arg, sizeof(new_serial)))
-		return -EFAULT;
-
-	cwait = new_serial.closing_wait;
-	if (cwait != ASYNC_CLOSING_WAIT_NONE)
-		cwait = msecs_to_jiffies(10 * new_serial.closing_wait);
-
-	port->port.closing_wait = cwait;
-
-	return 0;
-}
-
-static int mxu1_ioctl(struct tty_struct *tty,
-		      unsigned int cmd, unsigned long arg)
-{
-	struct usb_serial_port *port = tty->driver_data;
-
-	switch (cmd) {
-	case TIOCGSERIAL:
-		return mxu1_get_serial_info(port,
-					    (struct serial_struct __user *)arg);
-	case TIOCSSERIAL:
-		return mxu1_set_serial_info(port,
-					    (struct serial_struct __user *)arg);
-	}
-
-	return -ENOIOCTLCMD;
-}
-
-static int mxu1_tiocmget(struct tty_struct *tty)
-{
-	struct usb_serial_port *port = tty->driver_data;
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	unsigned int result;
-	unsigned int msr;
-	unsigned int mcr;
-	unsigned long flags;
-
-	mutex_lock(&mxport->mutex);
-	spin_lock_irqsave(&mxport->spinlock, flags);
-
-	msr = mxport->msr;
-	mcr = mxport->mcr;
-
-	spin_unlock_irqrestore(&mxport->spinlock, flags);
-	mutex_unlock(&mxport->mutex);
-
-	result = ((mcr & MXU1_MCR_DTR)	? TIOCM_DTR	: 0) |
-		 ((mcr & MXU1_MCR_RTS)	? TIOCM_RTS	: 0) |
-		 ((mcr & MXU1_MCR_LOOP) ? TIOCM_LOOP	: 0) |
-		 ((msr & MXU1_MSR_CTS)	? TIOCM_CTS	: 0) |
-		 ((msr & MXU1_MSR_CD)	? TIOCM_CAR	: 0) |
-		 ((msr & MXU1_MSR_RI)	? TIOCM_RI	: 0) |
-		 ((msr & MXU1_MSR_DSR)	? TIOCM_DSR	: 0);
-
-	dev_dbg(&port->dev, "%s - 0x%04X\n", __func__, result);
-
-	return result;
-}
-
-static int mxu1_tiocmset(struct tty_struct *tty,
-			 unsigned int set, unsigned int clear)
-{
-	struct usb_serial_port *port = tty->driver_data;
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	int err;
-	unsigned int mcr;
-
-	mutex_lock(&mxport->mutex);
-	mcr = mxport->mcr;
-
-	if (set & TIOCM_RTS)
-		mcr |= MXU1_MCR_RTS;
-	if (set & TIOCM_DTR)
-		mcr |= MXU1_MCR_DTR;
-	if (set & TIOCM_LOOP)
-		mcr |= MXU1_MCR_LOOP;
-
-	if (clear & TIOCM_RTS)
-		mcr &= ~MXU1_MCR_RTS;
-	if (clear & TIOCM_DTR)
-		mcr &= ~MXU1_MCR_DTR;
-	if (clear & TIOCM_LOOP)
-		mcr &= ~MXU1_MCR_LOOP;
-
-	err = mxu1_set_mcr(port, mcr);
-	if (!err)
-		mxport->mcr = mcr;
-
-	mutex_unlock(&mxport->mutex);
-
-	return err;
-}
-
-static void mxu1_break(struct tty_struct *tty, int break_state)
-{
-	struct usb_serial_port *port = tty->driver_data;
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-
-	if (break_state == -1)
-		mxport->send_break = true;
-	else
-		mxport->send_break = false;
-
-	mxu1_set_termios(tty, port, NULL);
-}
-
-static int mxu1_open(struct tty_struct *tty, struct usb_serial_port *port)
-{
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	struct usb_serial *serial = port->serial;
-	int status;
-	u16 open_settings;
-
-	open_settings = (MXU1_PIPE_MODE_CONTINUOUS |
-			 MXU1_PIPE_TIMEOUT_ENABLE |
-			 (MXU1_TRANSFER_TIMEOUT << 2));
-
-	mxport->msr = 0;
-
-	status = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
-	if (status) {
-		dev_err(&port->dev, "failed to submit interrupt urb: %d\n",
-			status);
-		return status;
-	}
-
-	if (tty)
-		mxu1_set_termios(tty, port, NULL);
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_OPEN_PORT,
-				    open_settings, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send open command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_START_PORT,
-				    0, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send start command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_PURGE_PORT,
-				    MXU1_PURGE_INPUT, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot clear input buffers: %d\n",
-			status);
-
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_PURGE_PORT,
-				    MXU1_PURGE_OUTPUT, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot clear output buffers: %d\n",
-			status);
-
-		goto unlink_int_urb;
-	}
-
-	/*
-	 * reset the data toggle on the bulk endpoints to work around bug in
-	 * host controllers where things get out of sync some times
-	 */
-	usb_clear_halt(serial->dev, port->write_urb->pipe);
-	usb_clear_halt(serial->dev, port->read_urb->pipe);
-
-	if (tty)
-		mxu1_set_termios(tty, port, NULL);
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_OPEN_PORT,
-				    open_settings, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send open command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_START_PORT,
-				    0, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send start command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = usb_serial_generic_open(tty, port);
-	if (status)
-		goto unlink_int_urb;
-
-	return 0;
-
-unlink_int_urb:
-	usb_kill_urb(port->interrupt_in_urb);
-
-	return status;
-}
-
-static void mxu1_close(struct usb_serial_port *port)
-{
-	int status;
-
-	usb_serial_generic_close(port);
-	usb_kill_urb(port->interrupt_in_urb);
-
-	status = mxu1_send_ctrl_urb(port->serial, MXU1_CLOSE_PORT,
-				    0, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "failed to send close port command: %d\n",
-			status);
-	}
-}
-
-static void mxu1_handle_new_msr(struct usb_serial_port *port, u8 msr)
-{
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	struct async_icount *icount;
-	unsigned long flags;
-
-	dev_dbg(&port->dev, "%s - msr 0x%02X\n", __func__, msr);
-
-	spin_lock_irqsave(&mxport->spinlock, flags);
-	mxport->msr = msr & MXU1_MSR_MASK;
-	spin_unlock_irqrestore(&mxport->spinlock, flags);
-
-	if (msr & MXU1_MSR_DELTA_MASK) {
-		icount = &port->icount;
-		if (msr & MXU1_MSR_DELTA_CTS)
-			icount->cts++;
-		if (msr & MXU1_MSR_DELTA_DSR)
-			icount->dsr++;
-		if (msr & MXU1_MSR_DELTA_CD)
-			icount->dcd++;
-		if (msr & MXU1_MSR_DELTA_RI)
-			icount->rng++;
-
-		wake_up_interruptible(&port->port.delta_msr_wait);
-	}
-}
-
-static void mxu1_interrupt_callback(struct urb *urb)
-{
-	struct usb_serial_port *port = urb->context;
-	unsigned char *data = urb->transfer_buffer;
-	int length = urb->actual_length;
-	int function;
-	int status;
-	u8 msr;
-
-	switch (urb->status) {
-	case 0:
-		break;
-	case -ECONNRESET:
-	case -ENOENT:
-	case -ESHUTDOWN:
-		dev_dbg(&port->dev, "%s - urb shutting down: %d\n",
-			__func__, urb->status);
-		return;
-	default:
-		dev_dbg(&port->dev, "%s - nonzero urb status: %d\n",
-			__func__, urb->status);
-		goto exit;
-	}
-
-	if (length != 2) {
-		dev_dbg(&port->dev, "%s - bad packet size: %d\n",
-			__func__, length);
-		goto exit;
-	}
-
-	if (data[0] == MXU1_CODE_HARDWARE_ERROR) {
-		dev_err(&port->dev, "hardware error: %d\n", data[1]);
-		goto exit;
-	}
-
-	function = mxu1_get_func_from_code(data[0]);
-
-	dev_dbg(&port->dev, "%s - function %d, data 0x%02X\n",
-		 __func__, function, data[1]);
-
-	switch (function) {
-	case MXU1_CODE_DATA_ERROR:
-		dev_dbg(&port->dev, "%s - DATA ERROR, data 0x%02X\n",
-			 __func__, data[1]);
-		break;
-
-	case MXU1_CODE_MODEM_STATUS:
-		msr = data[1];
-		mxu1_handle_new_msr(port, msr);
-		break;
-
-	default:
-		dev_err(&port->dev, "unknown interrupt code: 0x%02X\n",
-			data[1]);
-		break;
-	}
-
-exit:
-	status = usb_submit_urb(urb, GFP_ATOMIC);
-	if (status) {
-		dev_err(&port->dev, "resubmit interrupt urb failed: %d\n",
-			status);
-	}
-}
-
-static struct usb_serial_driver mxu11x0_device = {
-	.driver = {
-		.owner		= THIS_MODULE,
-		.name		= "mxu11x0",
-	},
-	.description		= "MOXA UPort 11x0",
-	.id_table		= mxu1_idtable,
-	.num_ports		= 1,
-	.port_probe             = mxu1_port_probe,
-	.attach			= mxu1_startup,
-	.open			= mxu1_open,
-	.close			= mxu1_close,
-	.ioctl			= mxu1_ioctl,
-	.set_termios		= mxu1_set_termios,
-	.tiocmget		= mxu1_tiocmget,
-	.tiocmset		= mxu1_tiocmset,
-	.tiocmiwait		= usb_serial_generic_tiocmiwait,
-	.get_icount		= usb_serial_generic_get_icount,
-	.break_ctl		= mxu1_break,
-	.read_int_callback	= mxu1_interrupt_callback,
-};
-
-static struct usb_serial_driver *const serial_drivers[] = {
-	&mxu11x0_device, NULL
-};
-
-module_usb_serial_driver(serial_drivers, mxu1_idtable);
-
-MODULE_AUTHOR("Mathieu Othacehe <m.othacehe@gmail.com>");
-MODULE_DESCRIPTION("MOXA UPort 11x0 USB to Serial Hub Driver");
-MODULE_LICENSE("GPL");
-MODULE_FIRMWARE("moxa/moxa-1110.fw");
-MODULE_FIRMWARE("moxa/moxa-1130.fw");
-MODULE_FIRMWARE("moxa/moxa-1131.fw");
-MODULE_FIRMWARE("moxa/moxa-1150.fw");
-MODULE_FIRMWARE("moxa/moxa-1151.fw");
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index f228060..348e198 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -268,6 +268,9 @@
 #define TELIT_PRODUCT_CC864_SINGLE		0x1006
 #define TELIT_PRODUCT_DE910_DUAL		0x1010
 #define TELIT_PRODUCT_UE910_V2			0x1012
+#define TELIT_PRODUCT_LE922_USBCFG0		0x1042
+#define TELIT_PRODUCT_LE922_USBCFG3		0x1043
+#define TELIT_PRODUCT_LE922_USBCFG5		0x1045
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 
@@ -313,6 +316,7 @@
 #define TOSHIBA_PRODUCT_G450			0x0d45
 
 #define ALINK_VENDOR_ID				0x1e0e
+#define SIMCOM_PRODUCT_SIM7100E			0x9001 /* Yes, ALINK_VENDOR_ID */
 #define ALINK_PRODUCT_PH300			0x9100
 #define ALINK_PRODUCT_3GU			0x9200
 
@@ -605,6 +609,10 @@
 	.reserved = BIT(3) | BIT(4),
 };
 
+static const struct option_blacklist_info simcom_sim7100e_blacklist = {
+	.reserved = BIT(5) | BIT(6),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
 	.sendsetup = BIT(0),
 	.reserved = BIT(1) | BIT(2),
@@ -615,6 +623,16 @@
 	.reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+	.sendsetup = BIT(2),
+	.reserved = BIT(0) | BIT(1) | BIT(3),
+};
+
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1110,9 +1128,13 @@
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */
+	  .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1160,6 +1182,12 @@
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
 		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1629,6 +1657,8 @@
 	{ USB_DEVICE(ALINK_VENDOR_ID, 0x9000) },
 	{ USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
+	  .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist },
 	{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
 	  .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
 	},
@@ -1679,7 +1709,7 @@
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
-	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) },
+	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 9919d2a..1bc6089 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -157,14 +157,17 @@
 	{DEVICE_SWI(0x1199, 0x9056)},	/* Sierra Wireless Modem */
 	{DEVICE_SWI(0x1199, 0x9060)},	/* Sierra Wireless Modem */
 	{DEVICE_SWI(0x1199, 0x9061)},	/* Sierra Wireless Modem */
-	{DEVICE_SWI(0x1199, 0x9070)},	/* Sierra Wireless MC74xx/EM74xx */
-	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx/EM74xx */
+	{DEVICE_SWI(0x1199, 0x9070)},	/* Sierra Wireless MC74xx */
+	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx */
+	{DEVICE_SWI(0x1199, 0x9078)},	/* Sierra Wireless EM74xx */
+	{DEVICE_SWI(0x1199, 0x9079)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x413c, 0x81a2)},	/* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a3)},	/* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a4)},	/* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a8)},	/* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a9)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81b1)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+	{DEVICE_SWI(0x413c, 0x81b3)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
 
 	/* Huawei devices */
 	{DEVICE_HWI(0x03f0, 0x581d)},	/* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 60afb39..337a0be 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -544,6 +544,11 @@
 		(serial->num_interrupt_in == 0))
 		return 0;
 
+	if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) {
+		dev_err(&serial->interface->dev, "missing endpoints\n");
+		return -ENODEV;
+	}
+
 	/*
 	* It appears that Treos and Kyoceras want to use the
 	* 1st bulk in endpoint to communicate with the 2nd bulk out endpoint,
@@ -597,8 +602,10 @@
 	 */
 
 	/* some sanity check */
-	if (serial->num_ports < 2)
-		return -1;
+	if (serial->num_bulk_out < 2) {
+		dev_err(&serial->interface->dev, "missing bulk out endpoints\n");
+		return -ENODEV;
+	}
 
 	/* port 0 now uses the modified endpoint Address */
 	port = serial->port[0];
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 2760a7b..8c80a48 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -446,7 +446,8 @@
 		info.num_regions = VFIO_PCI_NUM_REGIONS;
 		info.num_irqs = VFIO_PCI_NUM_IRQS;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
 		struct pci_dev *pdev = vdev->pdev;
@@ -520,7 +521,8 @@
 			return -EINVAL;
 		}
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
 		struct vfio_irq_info info;
@@ -555,7 +557,8 @@
 		else
 			info.flags |= VFIO_IRQ_INFO_NORESIZE;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
 		struct vfio_irq_set hdr;
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 418cdd9..e65b142 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -219,7 +219,8 @@
 		info.num_regions = vdev->num_regions;
 		info.num_irqs = vdev->num_irqs;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
 		struct vfio_region_info info;
@@ -240,7 +241,8 @@
 		info.size = vdev->regions[info.index].size;
 		info.flags = vdev->regions[info.index].flags;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
 		struct vfio_irq_info info;
@@ -259,7 +261,8 @@
 		info.flags = vdev->irqs[info.index].flags;
 		info.count = vdev->irqs[info.index].count;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
 		struct vfio_irq_set hdr;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 82f25cc..ecca316 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -123,8 +123,8 @@
 	/*
 	 * With noiommu enabled, an IOMMU group will be created for a device
 	 * that doesn't already have one and doesn't have an iommu_ops on their
-	 * bus.  We use iommu_present() again in the main code to detect these
-	 * fake groups.
+	 * bus.  We set iommudata simply to be able to identify these groups
+	 * as special use and for reclamation later.
 	 */
 	if (group || !noiommu || iommu_present(dev->bus))
 		return group;
@@ -134,6 +134,7 @@
 		return NULL;
 
 	iommu_group_set_name(group, "vfio-noiommu");
+	iommu_group_set_iommudata(group, &noiommu, NULL);
 	ret = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
 	if (ret)
@@ -158,7 +159,7 @@
 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 {
 #ifdef CONFIG_VFIO_NOIOMMU
-	if (!iommu_present(dev->bus))
+	if (iommu_group_get_iommudata(group) == &noiommu)
 		iommu_group_remove_device(dev);
 #endif
 
@@ -190,16 +191,10 @@
 	return -ENOTTY;
 }
 
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-	return iommu_present(dev->bus) ? 1 : 0;
-}
-
 static int vfio_noiommu_attach_group(void *iommu_data,
 				     struct iommu_group *iommu_group)
 {
-	return iommu_group_for_each_dev(iommu_group, NULL,
-					vfio_iommu_present) ? -EINVAL : 0;
+	return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 }
 
 static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-					    bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group, *tmp;
 	struct device *dev;
@@ -342,7 +336,9 @@
 	atomic_set(&group->container_users, 0);
 	atomic_set(&group->opened, 0);
 	group->iommu_group = iommu_group;
-	group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+	group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -767,7 +763,7 @@
 
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group) {
-		group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+		group = vfio_create_group(iommu_group);
 		if (IS_ERR(group)) {
 			iommu_group_put(iommu_group);
 			return PTR_ERR(group);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 6f1ea3d..75b24e9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -999,7 +999,8 @@
 
 		info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_IOMMU_MAP_DMA) {
 		struct vfio_iommu_type1_dma_map map;
@@ -1032,7 +1033,8 @@
 		if (ret)
 			return ret;
 
-		return copy_to_user((void __user *)arg, &unmap, minsz);
+		return copy_to_user((void __user *)arg, &unmap, minsz) ?
+			-EFAULT : 0;
 	}
 
 	return -ENOTTY;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ad2146a..236553e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1156,6 +1156,8 @@
 {
 	__virtio16 last_used_idx;
 	int r;
+	bool is_le = vq->is_le;
+
 	if (!vq->private_data) {
 		vq->is_le = virtio_legacy_is_little_endian();
 		return 0;
@@ -1165,15 +1167,20 @@
 
 	r = vhost_update_used_flags(vq);
 	if (r)
-		return r;
+		goto err;
 	vq->signalled_used_valid = false;
-	if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx))
-		return -EFAULT;
+	if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) {
+		r = -EFAULT;
+		goto err;
+	}
 	r = __get_user(last_used_idx, &vq->used->idx);
 	if (r)
-		return r;
+		goto err;
 	vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
 	return 0;
+err:
+	vq->is_le = is_le;
+	return r;
 }
 EXPORT_SYMBOL_GPL(vhost_init_used);
 
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 92f3949..6e92917 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -709,6 +709,7 @@
 	}
 
 	if (!err) {
+		ops->cur_blink_jiffies = HZ / 5;
 		info->fbcon_par = ops;
 
 		if (vc)
@@ -956,6 +957,7 @@
 	ops->currcon = -1;
 	ops->graphics = 1;
 	ops->cur_rotate = -1;
+	ops->cur_blink_jiffies = HZ / 5;
 	info->fbcon_par = ops;
 	p->con_rotate = initial_rotation;
 	set_blitting_type(vc, info);
diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c
index a305cae..fb75b7e 100644
--- a/drivers/video/fbdev/acornfb.c
+++ b/drivers/video/fbdev/acornfb.c
@@ -1040,8 +1040,8 @@
 		 * for the framebuffer if we are not using
 		 * VRAM.
 		 */
-		base = dma_alloc_writecombine(current_par.dev, size, &handle,
-					      GFP_KERNEL);
+		base = dma_alloc_wc(current_par.dev, size, &handle,
+				    GFP_KERNEL);
 		if (base == NULL) {
 			printk(KERN_ERR "acornfb: unable to allocate screen "
 			       "memory\n");
diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c
index 7a8afcd..a8a22da 100644
--- a/drivers/video/fbdev/amba-clcd-versatile.c
+++ b/drivers/video/fbdev/amba-clcd-versatile.c
@@ -154,8 +154,8 @@
 {
 	dma_addr_t dma;
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
-						    &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, framesize, &dma,
+					  GFP_KERNEL);
 	if (!fb->fb.screen_base) {
 		pr_err("CLCD: unable to map framebuffer\n");
 		return -ENOMEM;
@@ -169,14 +169,12 @@
 
 int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-				     fb->fb.screen_base,
-				     fb->fb.fix.smem_start,
-				     fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void versatile_clcd_remove_dma(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-			      fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }
diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 9362424..fe274b5 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -774,8 +774,8 @@
 
 static int clcdfb_of_dma_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma, fb->fb.screen_base,
-			fb->fb.fix.smem_start, fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 static void clcdfb_of_dma_remove(struct clcd_fb *fb)
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 19eb42b..56c60e6 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -414,8 +414,8 @@
 {
 	struct fb_info *info = sinfo->info;
 
-	dma_free_writecombine(info->device, info->fix.smem_len,
-				info->screen_base, info->fix.smem_start);
+	dma_free_wc(info->device, info->fix.smem_len, info->screen_base,
+		    info->fix.smem_start);
 }
 
 /**
@@ -435,8 +435,9 @@
 		    * ((var->bits_per_pixel + 7) / 8));
 	info->fix.smem_len = max(smem_len, sinfo->smem_len);
 
-	info->screen_base = dma_alloc_writecombine(info->device, info->fix.smem_len,
-					(dma_addr_t *)&info->fix.smem_start, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(info->device, info->fix.smem_len,
+					 (dma_addr_t *)&info->fix.smem_start,
+					 GFP_KERNEL);
 
 	if (!info->screen_base) {
 		return -ENOMEM;
diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index 0081725..6b2a06d 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -152,7 +152,7 @@
 
 struct da8xx_fb_par {
 	struct device		*dev;
-	resource_size_t p_palette_base;
+	dma_addr_t		p_palette_base;
 	unsigned char *v_palette_base;
 	dma_addr_t		vram_phys;
 	unsigned long		vram_size;
@@ -1428,7 +1428,7 @@
 
 	par->vram_virt = dma_alloc_coherent(NULL,
 					    par->vram_size,
-					    (resource_size_t *) &par->vram_phys,
+					    &par->vram_phys,
 					    GFP_KERNEL | GFP_DMA);
 	if (!par->vram_virt) {
 		dev_err(&device->dev,
@@ -1448,7 +1448,7 @@
 
 	/* allocate palette buffer */
 	par->v_palette_base = dma_zalloc_coherent(NULL, PALETTE_SIZE,
-						  (resource_size_t *)&par->p_palette_base,
+						  &par->p_palette_base,
 						  GFP_KERNEL | GFP_DMA);
 	if (!par->v_palette_base) {
 		dev_err(&device->dev,
diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c
index 5b10810..75f0db2 100644
--- a/drivers/video/fbdev/ep93xx-fb.c
+++ b/drivers/video/fbdev/ep93xx-fb.c
@@ -316,9 +316,8 @@
 	unsigned int offset = vma->vm_pgoff << PAGE_SHIFT;
 
 	if (offset < info->fix.smem_len) {
-		return dma_mmap_writecombine(info->dev, vma, info->screen_base,
-					     info->fix.smem_start,
-					     info->fix.smem_len);
+		return dma_mmap_wc(info->dev, vma, info->screen_base,
+				   info->fix.smem_start, info->fix.smem_len);
 	}
 
 	return -EINVAL;
@@ -428,8 +427,7 @@
 	/* Maximum 16bpp -> used memory is maximum x*y*2 bytes */
 	fb_size = EP93XXFB_MAX_XRES * EP93XXFB_MAX_YRES * 2;
 
-	virt_addr = dma_alloc_writecombine(info->dev, fb_size,
-					   &phys_addr, GFP_KERNEL);
+	virt_addr = dma_alloc_wc(info->dev, fb_size, &phys_addr, GFP_KERNEL);
 	if (!virt_addr)
 		return -ENOMEM;
 
diff --git a/drivers/video/fbdev/exynos/s6e8ax0.c b/drivers/video/fbdev/exynos/s6e8ax0.c
index 95873f2..de2f3e7 100644
--- a/drivers/video/fbdev/exynos/s6e8ax0.c
+++ b/drivers/video/fbdev/exynos/s6e8ax0.c
@@ -829,8 +829,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int s6e8ax0_suspend(struct mipi_dsim_lcd_device *dsim_dev)
+static int __maybe_unused s6e8ax0_suspend(struct mipi_dsim_lcd_device *dsim_dev)
 {
 	struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev);
 
@@ -843,7 +842,7 @@
 	return 0;
 }
 
-static int s6e8ax0_resume(struct mipi_dsim_lcd_device *dsim_dev)
+static int __maybe_unused s6e8ax0_resume(struct mipi_dsim_lcd_device *dsim_dev)
 {
 	struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev);
 
@@ -855,10 +854,6 @@
 
 	return 0;
 }
-#else
-#define s6e8ax0_suspend		NULL
-#define s6e8ax0_resume		NULL
-#endif
 
 static struct mipi_dsim_lcd_driver s6e8ax0_dsim_ddi_driver = {
 	.name = "s6e8ax0",
@@ -867,8 +862,8 @@
 	.power_on = s6e8ax0_power_on,
 	.set_sequence = s6e8ax0_set_sequence,
 	.probe = s6e8ax0_probe,
-	.suspend = s6e8ax0_suspend,
-	.resume = s6e8ax0_resume,
+	.suspend = IS_ENABLED(CONFIG_PM) ? s6e8ax0_suspend : NULL,
+	.resume = IS_ENABLED(CONFIG_PM) ? s6e8ax0_resume : NULL,
 };
 
 static int s6e8ax0_init(void)
diff --git a/drivers/video/fbdev/gbefb.c b/drivers/video/fbdev/gbefb.c
index b63d55f..1a242b1 100644
--- a/drivers/video/fbdev/gbefb.c
+++ b/drivers/video/fbdev/gbefb.c
@@ -1185,8 +1185,8 @@
 	} else {
 		/* try to allocate memory with the classical allocator
 		 * this has high chance to fail on low memory machines */
-		gbe_mem = dma_alloc_writecombine(NULL, gbe_mem_size,
-						 &gbe_dma_addr, GFP_KERNEL);
+		gbe_mem = dma_alloc_wc(NULL, gbe_mem_size, &gbe_dma_addr,
+				       GFP_KERNEL);
 		if (!gbe_mem) {
 			printk(KERN_ERR "gbefb: couldn't allocate framebuffer memory\n");
 			ret = -ENOMEM;
@@ -1238,7 +1238,7 @@
 out_gbe_unmap:
 	arch_phys_wc_del(par->wc_cookie);
 	if (gbe_dma_addr)
-		dma_free_writecombine(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
+		dma_free_wc(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
 out_tiles_free:
 	dma_free_coherent(NULL, GBE_TLB_SIZE * sizeof(uint16_t),
 			  (void *)gbe_tiles.cpu, gbe_tiles.dma);
@@ -1259,7 +1259,7 @@
 	gbe_turn_off();
 	arch_phys_wc_del(par->wc_cookie);
 	if (gbe_dma_addr)
-		dma_free_writecombine(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
+		dma_free_wc(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
 	dma_free_coherent(NULL, GBE_TLB_SIZE * sizeof(uint16_t),
 			  (void *)gbe_tiles.cpu, gbe_tiles.dma);
 	release_mem_region(GBE_BASE, sizeof(struct sgi_gbe));
diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index cee8860..76b6a77 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -902,6 +902,21 @@
 		goto failed_getclock;
 	}
 
+	/*
+	 * The LCDC controller does not have an enable bit. The
+	 * controller starts directly when the clocks are enabled.
+	 * If the clocks are enabled when the controller is not yet
+	 * programmed with proper register values (enabled at the
+	 * bootloader, for example) then it just goes into some undefined
+	 * state.
+	 * To avoid this issue, let's enable and disable LCDC IPG clock
+	 * so that we force some kind of 'reset' to the LCDC block.
+	 */
+	ret = clk_prepare_enable(fbi->clk_ipg);
+	if (ret)
+		goto failed_getclock;
+	clk_disable_unprepare(fbi->clk_ipg);
+
 	fbi->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
 	if (IS_ERR(fbi->clk_ahb)) {
 		ret = PTR_ERR(fbi->clk_ahb);
@@ -922,8 +937,8 @@
 	}
 
 	fbi->map_size = PAGE_ALIGN(info->fix.smem_len);
-	info->screen_base = dma_alloc_writecombine(&pdev->dev, fbi->map_size,
-						   &fbi->map_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(&pdev->dev, fbi->map_size,
+					 &fbi->map_dma, GFP_KERNEL);
 
 	if (!info->screen_base) {
 		dev_err(&pdev->dev, "Failed to allocate video RAM: %d\n", ret);
@@ -990,8 +1005,8 @@
 	if (pdata && pdata->exit)
 		pdata->exit(fbi->pdev);
 failed_platform_init:
-	dma_free_writecombine(&pdev->dev, fbi->map_size, info->screen_base,
-			      fbi->map_dma);
+	dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
+		    fbi->map_dma);
 failed_map:
 	iounmap(fbi->regs);
 failed_ioremap:
@@ -1026,8 +1041,8 @@
 	kfree(info->pseudo_palette);
 	framebuffer_release(info);
 
-	dma_free_writecombine(&pdev->dev, fbi->map_size, info->screen_base,
-			      fbi->map_dma);
+	dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
+		    fbi->map_dma);
 
 	iounmap(fbi->regs);
 	release_mem_region(res->start, resource_size(res));
diff --git a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
index de54a47..b6f83d5 100644
--- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
+++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
@@ -503,8 +503,7 @@
 	ctrl->reg_base = devm_ioremap_nocache(ctrl->dev,
 			res->start, resource_size(res));
 	if (ctrl->reg_base == NULL) {
-		dev_err(ctrl->dev, "%s: res %x - %x map failed\n", __func__,
-			res->start, res->end);
+		dev_err(ctrl->dev, "%s: res %pR map failed\n", __func__, res);
 		ret = -ENOMEM;
 		goto failed;
 	}
diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c
index 7947634..f91b1db 100644
--- a/drivers/video/fbdev/mx3fb.c
+++ b/drivers/video/fbdev/mx3fb.c
@@ -1336,9 +1336,8 @@
 	int retval = 0;
 	dma_addr_t addr;
 
-	fbi->screen_base = dma_alloc_writecombine(fbi->device,
-						  mem_len,
-						  &addr, GFP_DMA | GFP_KERNEL);
+	fbi->screen_base = dma_alloc_wc(fbi->device, mem_len, &addr,
+					GFP_DMA | GFP_KERNEL);
 
 	if (!fbi->screen_base) {
 		dev_err(fbi->device, "Cannot allocate %u bytes framebuffer memory\n",
@@ -1378,8 +1377,8 @@
  */
 static int mx3fb_unmap_video_memory(struct fb_info *fbi)
 {
-	dma_free_writecombine(fbi->device, fbi->fix.smem_len,
-			      fbi->screen_base, fbi->fix.smem_start);
+	dma_free_wc(fbi->device, fbi->fix.smem_len, fbi->screen_base,
+		    fbi->fix.smem_start);
 
 	fbi->screen_base = NULL;
 	mutex_lock(&fbi->mm_lock);
diff --git a/drivers/video/fbdev/nuc900fb.c b/drivers/video/fbdev/nuc900fb.c
index 389fa2c..6680eda 100644
--- a/drivers/video/fbdev/nuc900fb.c
+++ b/drivers/video/fbdev/nuc900fb.c
@@ -396,8 +396,8 @@
 	dev_dbg(fbi->dev, "nuc900fb_map_video_memory(fbi=%p) map_size %lu\n",
 		fbi, map_size);
 
-	info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
-							&map_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
+					 GFP_KERNEL);
 
 	if (!info->screen_base)
 		return -ENOMEM;
@@ -411,8 +411,8 @@
 static inline void nuc900fb_unmap_video_memory(struct fb_info *info)
 {
 	struct nuc900fb_info *fbi = info->par;
-	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-			      info->screen_base, info->fix.smem_start);
+	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+		    info->screen_base, info->fix.smem_start);
 }
 
 static irqreturn_t nuc900fb_irqhandler(int irq, void *dev_id)
diff --git a/drivers/video/fbdev/ocfb.c b/drivers/video/fbdev/ocfb.c
index c9293ae..a970edc2 100644
--- a/drivers/video/fbdev/ocfb.c
+++ b/drivers/video/fbdev/ocfb.c
@@ -123,11 +123,11 @@
 
 	/* Horizontal timings */
 	ocfb_writereg(fbdev, OCFB_HTIM, (var->hsync_len - 1) << 24 |
-		      (var->right_margin - 1) << 16 | (var->xres - 1));
+		      (var->left_margin - 1) << 16 | (var->xres - 1));
 
 	/* Vertical timings */
 	ocfb_writereg(fbdev, OCFB_VTIM, (var->vsync_len - 1) << 24 |
-		      (var->lower_margin - 1) << 16 | (var->yres - 1));
+		      (var->upper_margin - 1) << 16 | (var->yres - 1));
 
 	/* Total length of frame */
 	hlen = var->left_margin + var->right_margin + var->hsync_len +
diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c
index 6efa259..e3d9b9e 100644
--- a/drivers/video/fbdev/omap/lcdc.c
+++ b/drivers/video/fbdev/omap/lcdc.c
@@ -612,8 +612,8 @@
 
 static int alloc_palette_ram(void)
 {
-	lcdc.palette_virt = dma_alloc_writecombine(lcdc.fbdev->dev,
-		MAX_PALETTE_SIZE, &lcdc.palette_phys, GFP_KERNEL);
+	lcdc.palette_virt = dma_alloc_wc(lcdc.fbdev->dev, MAX_PALETTE_SIZE,
+					 &lcdc.palette_phys, GFP_KERNEL);
 	if (lcdc.palette_virt == NULL) {
 		dev_err(lcdc.fbdev->dev, "failed to alloc palette memory\n");
 		return -ENOMEM;
@@ -625,8 +625,8 @@
 
 static void free_palette_ram(void)
 {
-	dma_free_writecombine(lcdc.fbdev->dev, MAX_PALETTE_SIZE,
-			lcdc.palette_virt, lcdc.palette_phys);
+	dma_free_wc(lcdc.fbdev->dev, MAX_PALETTE_SIZE, lcdc.palette_virt,
+		    lcdc.palette_phys);
 }
 
 static int alloc_fbmem(struct omapfb_mem_region *region)
@@ -642,8 +642,8 @@
 	if (region->size > frame_size)
 		frame_size = region->size;
 	lcdc.vram_size = frame_size;
-	lcdc.vram_virt = dma_alloc_writecombine(lcdc.fbdev->dev,
-			lcdc.vram_size, &lcdc.vram_phys, GFP_KERNEL);
+	lcdc.vram_virt = dma_alloc_wc(lcdc.fbdev->dev, lcdc.vram_size,
+				      &lcdc.vram_phys, GFP_KERNEL);
 	if (lcdc.vram_virt == NULL) {
 		dev_err(lcdc.fbdev->dev, "unable to allocate FB DMA memory\n");
 		return -ENOMEM;
@@ -660,8 +660,8 @@
 
 static void free_fbmem(void)
 {
-	dma_free_writecombine(lcdc.fbdev->dev, lcdc.vram_size,
-			      lcdc.vram_virt, lcdc.vram_phys);
+	dma_free_wc(lcdc.fbdev->dev, lcdc.vram_size, lcdc.vram_virt,
+		    lcdc.vram_phys);
 }
 
 static int setup_fbmem(struct omapfb_mem_desc *req_md)
diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
index efb57c0..def3a50 100644
--- a/drivers/video/fbdev/pxa168fb.c
+++ b/drivers/video/fbdev/pxa168fb.c
@@ -680,8 +680,8 @@
 	 */
 	info->fix.smem_len = PAGE_ALIGN(DEFAULT_FB_SIZE);
 
-	info->screen_base = dma_alloc_writecombine(fbi->dev, info->fix.smem_len,
-						&fbi->fb_start_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(fbi->dev, info->fix.smem_len,
+					 &fbi->fb_start_dma, GFP_KERNEL);
 	if (info->screen_base == NULL) {
 		ret = -ENOMEM;
 		goto failed_free_info;
@@ -804,8 +804,8 @@
 
 	irq = platform_get_irq(pdev, 0);
 
-	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-				info->screen_base, info->fix.smem_start);
+	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+		    info->screen_base, info->fix.smem_start);
 
 	clk_disable(fbi->clk);
 
diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index 33b2bb3..2c0487f 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -2446,8 +2446,8 @@
 
 	free_pages_exact(fbi->video_mem, fbi->video_mem_size);
 
-	dma_free_writecombine(&dev->dev, fbi->dma_buff_size,
-			fbi->dma_buff, fbi->dma_buff_phys);
+	dma_free_wc(&dev->dev, fbi->dma_buff_size, fbi->dma_buff,
+		    fbi->dma_buff_phys);
 
 	iounmap(fbi->mmio_base);
 
diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index f72dd12..5f4f696 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c
@@ -1105,8 +1105,7 @@
 
 	dev_dbg(sfb->dev, "want %u bytes for window\n", size);
 
-	fbi->screen_base = dma_alloc_writecombine(sfb->dev, size,
-						  &map_dma, GFP_KERNEL);
+	fbi->screen_base = dma_alloc_wc(sfb->dev, size, &map_dma, GFP_KERNEL);
 	if (!fbi->screen_base)
 		return -ENOMEM;
 
@@ -1131,8 +1130,8 @@
 	struct fb_info *fbi = win->fbinfo;
 
 	if (fbi->screen_base)
-		dma_free_writecombine(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len),
-			      fbi->screen_base, fbi->fix.smem_start);
+		dma_free_wc(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len),
+		            fbi->screen_base, fbi->fix.smem_start);
 }
 
 /**
diff --git a/drivers/video/fbdev/s3c2410fb.c b/drivers/video/fbdev/s3c2410fb.c
index d6704ad..0dd86be 100644
--- a/drivers/video/fbdev/s3c2410fb.c
+++ b/drivers/video/fbdev/s3c2410fb.c
@@ -645,8 +645,8 @@
 
 	dprintk("map_video_memory(fbi=%p) map_size %u\n", fbi, map_size);
 
-	info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
-						   &map_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
+					 GFP_KERNEL);
 
 	if (info->screen_base) {
 		/* prevent initial garbage on screen */
@@ -667,8 +667,8 @@
 {
 	struct s3c2410fb_info *fbi = info->par;
 
-	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-			      info->screen_base, info->fix.smem_start);
+	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+		    info->screen_base, info->fix.smem_start);
 }
 
 static inline void modify_gpio(void __iomem *reg,
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index dcf774c..fc2aaa5 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -567,8 +567,8 @@
 
 	if (off < info->fix.smem_len) {
 		vma->vm_pgoff += 1; /* skip over the palette */
-		return dma_mmap_writecombine(fbi->dev, vma, fbi->map_cpu,
-					     fbi->map_dma, fbi->map_size);
+		return dma_mmap_wc(fbi->dev, vma, fbi->map_cpu, fbi->map_dma,
+				   fbi->map_size);
 	}
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -1099,8 +1099,8 @@
 	 * of the framebuffer.
 	 */
 	fbi->map_size = PAGE_ALIGN(fbi->fb.fix.smem_len + PAGE_SIZE);
-	fbi->map_cpu = dma_alloc_writecombine(fbi->dev, fbi->map_size,
-					      &fbi->map_dma, GFP_KERNEL);
+	fbi->map_cpu = dma_alloc_wc(fbi->dev, fbi->map_size, &fbi->map_dma,
+				    GFP_KERNEL);
 
 	if (fbi->map_cpu) {
 		fbi->fb.screen_base = fbi->map_cpu + PAGE_SIZE;
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c2..f6bed86 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -545,6 +545,7 @@
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct device *dev = get_device(&vp_dev->vdev.dev);
 
 	unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@
 		virtio_pci_modern_remove(vp_dev);
 
 	pci_disable_device(pci_dev);
+	put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index c0c11fa..7760fc1 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -679,7 +679,7 @@
 
 	pci_read_config_dword(pci_dev,
 			      notify + offsetof(struct virtio_pci_notify_cap,
-						cap.length),
+						cap.offset),
 			      &notify_offset);
 
 	/* We don't know how many VQs we'll map, ahead of the time.
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 4f0e7be..80825a7 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -145,7 +145,8 @@
 config TANGOX_WATCHDOG
 	tristate "Sigma Designs SMP86xx/SMP87xx watchdog"
 	select WATCHDOG_CORE
-	depends on ARCH_TANGOX || COMPILE_TEST
+	depends on ARCH_TANGO || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  Support for the watchdog in Sigma Designs SMP86xx (tango3)
 	  and SMP87xx (tango4) family chips.
@@ -618,6 +619,7 @@
 config LPC18XX_WATCHDOG
 	tristate "LPC18xx/43xx Watchdog"
 	depends on ARCH_LPC18XX || COMPILE_TEST
+	depends on HAS_IOMEM
 	select WATCHDOG_CORE
 	help
 	  Say Y here if to include support for the watchdog timer
@@ -1374,6 +1376,7 @@
 config BCM7038_WDT
 	tristate "BCM7038 Watchdog"
 	select WATCHDOG_CORE
+	depends on HAS_IOMEM
 	help
 	 Watchdog driver for the built-in hardware in Broadcom 7038 SoCs.
 
@@ -1383,6 +1386,7 @@
 	tristate "Imagination Technologies PDC Watchdog Timer"
 	depends on HAS_IOMEM
 	depends on METAG || MIPS || COMPILE_TEST
+	select WATCHDOG_CORE
 	help
 	  Driver for Imagination Technologies PowerDown Controller
 	  Watchdog Timer.
@@ -1565,6 +1569,17 @@
 	  machines.  The watchdog timeout period is normally one minute but
 	  can be changed with a boot-time parameter.
 
+config WATCHDOG_SUN4V
+	tristate "Sun4v Watchdog support"
+	select WATCHDOG_CORE
+	depends on SPARC64
+	help
+	  Say Y here to support the hypervisor watchdog capability embedded
+	  in the SPARC sun4v architecture.
+
+	  To compile this driver as a module, choose M here. The module will
+	  be called sun4v_wdt.
+
 # XTENSA Architecture
 
 # Xen Architecture
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index f566753..f6a6a38 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -179,6 +179,7 @@
 
 obj-$(CONFIG_WATCHDOG_RIO)		+= riowd.o
 obj-$(CONFIG_WATCHDOG_CP1XXX)		+= cpwd.o
+obj-$(CONFIG_WATCHDOG_SUN4V)		+= sun4v_wdt.o
 
 # XTENSA Architecture
 
diff --git a/drivers/watchdog/max63xx_wdt.c b/drivers/watchdog/max63xx_wdt.c
index f36ca4b..ac5840d 100644
--- a/drivers/watchdog/max63xx_wdt.c
+++ b/drivers/watchdog/max63xx_wdt.c
@@ -292,4 +292,4 @@
 		 "Force selection of a timeout setting without initial delay "
 		 "(max6373/74 only, default=0)");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
index 1a11aed..68952d9 100644
--- a/drivers/watchdog/pcwd_usb.c
+++ b/drivers/watchdog/pcwd_usb.c
@@ -608,7 +608,7 @@
 	struct usb_host_interface *iface_desc;
 	struct usb_endpoint_descriptor *endpoint;
 	struct usb_pcwd_private *usb_pcwd = NULL;
-	int pipe, maxp;
+	int pipe;
 	int retval = -ENOMEM;
 	int got_fw_rev;
 	unsigned char fw_rev_major, fw_rev_minor;
@@ -641,7 +641,6 @@
 
 	/* get a handle to the interrupt data pipe */
 	pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
-	maxp = usb_maxpacket(udev, pipe, usb_pipeout(pipe));
 
 	/* allocate memory for our device and initialize it */
 	usb_pcwd = kzalloc(sizeof(struct usb_pcwd_private), GFP_KERNEL);
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index 01d8162..e7a715e 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -139,12 +139,11 @@
 
 	writel_relaxed(UNLOCK, wdt->base + WDTLOCK);
 	writel_relaxed(wdt->load_val, wdt->base + WDTLOAD);
+	writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
 
-	if (!ping) {
-		writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
+	if (!ping)
 		writel_relaxed(INT_ENABLE | RESET_ENABLE, wdt->base +
 				WDTCONTROL);
-	}
 
 	writel_relaxed(LOCK, wdt->base + WDTLOCK);
 
diff --git a/drivers/watchdog/sun4v_wdt.c b/drivers/watchdog/sun4v_wdt.c
new file mode 100644
index 0000000..1467fe5
--- /dev/null
+++ b/drivers/watchdog/sun4v_wdt.c
@@ -0,0 +1,191 @@
+/*
+ *	sun4v watchdog timer
+ *	(c) Copyright 2016 Oracle Corporation
+ *
+ *	Implement a simple watchdog driver using the built-in sun4v hypervisor
+ *	watchdog support. If time expires, the hypervisor stops or bounces
+ *	the guest domain.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/watchdog.h>
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#define WDT_TIMEOUT			60
+#define WDT_MAX_TIMEOUT			31536000
+#define WDT_MIN_TIMEOUT			1
+#define WDT_DEFAULT_RESOLUTION_MS	1000	/* 1 second */
+
+static unsigned int timeout;
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
+	__MODULE_STRING(WDT_TIMEOUT) ")");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, S_IRUGO);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int sun4v_wdt_stop(struct watchdog_device *wdd)
+{
+	sun4v_mach_set_watchdog(0, NULL);
+
+	return 0;
+}
+
+static int sun4v_wdt_ping(struct watchdog_device *wdd)
+{
+	int hverr;
+
+	/*
+	 * HV watchdog timer will round up the timeout
+	 * passed in to the nearest multiple of the
+	 * watchdog resolution in milliseconds.
+	 */
+	hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL);
+	if (hverr == HV_EINVAL)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int sun4v_wdt_set_timeout(struct watchdog_device *wdd,
+				 unsigned int timeout)
+{
+	wdd->timeout = timeout;
+
+	return 0;
+}
+
+static const struct watchdog_info sun4v_wdt_ident = {
+	.options =	WDIOF_SETTIMEOUT |
+			WDIOF_MAGICCLOSE |
+			WDIOF_KEEPALIVEPING,
+	.identity =	"sun4v hypervisor watchdog",
+	.firmware_version = 0,
+};
+
+static struct watchdog_ops sun4v_wdt_ops = {
+	.owner =	THIS_MODULE,
+	.start =	sun4v_wdt_ping,
+	.stop =		sun4v_wdt_stop,
+	.ping =		sun4v_wdt_ping,
+	.set_timeout =	sun4v_wdt_set_timeout,
+};
+
+static struct watchdog_device wdd = {
+	.info = &sun4v_wdt_ident,
+	.ops = &sun4v_wdt_ops,
+	.min_timeout = WDT_MIN_TIMEOUT,
+	.max_timeout = WDT_MAX_TIMEOUT,
+	.timeout = WDT_TIMEOUT,
+};
+
+static int __init sun4v_wdt_init(void)
+{
+	struct mdesc_handle *handle;
+	u64 node;
+	const u64 *value;
+	int err = 0;
+	unsigned long major = 1, minor = 1;
+
+	/*
+	 * There are 2 properties that can be set from the control
+	 * domain for the watchdog.
+	 * watchdog-resolution
+	 * watchdog-max-timeout
+	 *
+	 * We can expect a handle to be returned otherwise something
+	 * serious is wrong. Correct to return -ENODEV here.
+	 */
+
+	handle = mdesc_grab();
+	if (!handle)
+		return -ENODEV;
+
+	node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform");
+	err = -ENODEV;
+	if (node == MDESC_NODE_NULL)
+		goto out_release;
+
+	/*
+	 * This is a safe way to validate if we are on the right
+	 * platform.
+	 */
+	if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor))
+		goto out_hv_unreg;
+
+	/* Allow value of watchdog-resolution up to 1s (default) */
+	value = mdesc_get_property(handle, node, "watchdog-resolution", NULL);
+	err = -EINVAL;
+	if (value) {
+		if (*value == 0 ||
+		    *value > WDT_DEFAULT_RESOLUTION_MS)
+			goto out_hv_unreg;
+	}
+
+	value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL);
+	if (value) {
+		/*
+		 * If the property value (in ms) is smaller than
+		 * min_timeout, return -EINVAL.
+		 */
+		if (*value < wdd.min_timeout * 1000)
+			goto out_hv_unreg;
+
+		/*
+		 * If the property value is smaller than
+		 * default max_timeout  then set watchdog max_timeout to
+		 * the value of the property in seconds.
+		 */
+		if (*value < wdd.max_timeout * 1000)
+			wdd.max_timeout = *value  / 1000;
+	}
+
+	watchdog_init_timeout(&wdd, timeout, NULL);
+
+	watchdog_set_nowayout(&wdd, nowayout);
+
+	err = watchdog_register_device(&wdd);
+	if (err)
+		goto out_hv_unreg;
+
+	pr_info("initialized (timeout=%ds, nowayout=%d)\n",
+		 wdd.timeout, nowayout);
+
+	mdesc_release(handle);
+
+	return 0;
+
+out_hv_unreg:
+	sun4v_hvapi_unregister(HV_GRP_CORE);
+
+out_release:
+	mdesc_release(handle);
+	return err;
+}
+
+static void __exit sun4v_wdt_exit(void)
+{
+	sun4v_hvapi_unregister(HV_GRP_CORE);
+	watchdog_unregister_device(&wdd);
+}
+
+module_init(sun4v_wdt_init);
+module_exit(sun4v_wdt_exit);
+
+MODULE_AUTHOR("Wim Coekaerts <wim.coekaerts@oracle.com>");
+MODULE_DESCRIPTION("sun4v watchdog driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 12eab50..dc4305b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -257,7 +257,7 @@
 		return NULL;
 
 	res->name = "System RAM";
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 	ret = allocate_resource(&iomem_resource, res,
 				size, 0, -1,
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 945fc43..4ac2ca8 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -242,7 +242,7 @@
 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
 	.put_page = tmem_cleancache_put_page,
 	.get_page = tmem_cleancache_get_page,
 	.invalidate_page = tmem_cleancache_flush_page,
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 73dafdc..fb02214 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -227,8 +227,9 @@
 	/*
 	 * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
 	 * to access the BARs where the MSI-X entries reside.
+	 * But VF devices are unique in which the PF needs to be checked.
 	 */
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd);
 	if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
 		return -ENXIO;
 
@@ -332,6 +333,9 @@
 	struct xen_pcibk_dev_data *dev_data = NULL;
 	struct xen_pci_op *op = &pdev->op;
 	int test_intx = 0;
+#ifdef CONFIG_PCI_MSI
+	unsigned int nr = 0;
+#endif
 
 	*op = pdev->sh_info->op;
 	barrier();
@@ -360,6 +364,7 @@
 			op->err = xen_pcibk_disable_msi(pdev, dev, op);
 			break;
 		case XEN_PCI_OP_enable_msix:
+			nr = op->value;
 			op->err = xen_pcibk_enable_msix(pdev, dev, op);
 			break;
 		case XEN_PCI_OP_disable_msix:
@@ -382,7 +387,7 @@
 	if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
 		unsigned int i;
 
-		for (i = 0; i < op->value; i++)
+		for (i = 0; i < nr; i++)
 			pdev->sh_info->op.msix_entries[i].vector =
 				op->msix_entries[i].vector;
 	}
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index ad4eb10..c46ee18 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -849,15 +849,31 @@
 }
 
 /*
+  Check for a translation entry being present
+*/
+static struct v2p_entry *scsiback_chk_translation_entry(
+	struct vscsibk_info *info, struct ids_tuple *v)
+{
+	struct list_head *head = &(info->v2p_entry_lists);
+	struct v2p_entry *entry;
+
+	list_for_each_entry(entry, head, l)
+		if ((entry->v.chn == v->chn) &&
+		    (entry->v.tgt == v->tgt) &&
+		    (entry->v.lun == v->lun))
+			return entry;
+
+	return NULL;
+}
+
+/*
   Add a new translation entry
 */
 static int scsiback_add_translation_entry(struct vscsibk_info *info,
 					  char *phy, struct ids_tuple *v)
 {
 	int err = 0;
-	struct v2p_entry *entry;
 	struct v2p_entry *new;
-	struct list_head *head = &(info->v2p_entry_lists);
 	unsigned long flags;
 	char *lunp;
 	unsigned long long unpacked_lun;
@@ -917,15 +933,10 @@
 	spin_lock_irqsave(&info->v2p_lock, flags);
 
 	/* Check double assignment to identical virtual ID */
-	list_for_each_entry(entry, head, l) {
-		if ((entry->v.chn == v->chn) &&
-		    (entry->v.tgt == v->tgt) &&
-		    (entry->v.lun == v->lun)) {
-			pr_warn("Virtual ID is already used. Assignment was not performed.\n");
-			err = -EEXIST;
-			goto out;
-		}
-
+	if (scsiback_chk_translation_entry(info, v)) {
+		pr_warn("Virtual ID is already used. Assignment was not performed.\n");
+		err = -EEXIST;
+		goto out;
 	}
 
 	/* Create a new translation entry and add to the list */
@@ -933,18 +944,18 @@
 	new->v = *v;
 	new->tpg = tpg;
 	new->lun = unpacked_lun;
-	list_add_tail(&new->l, head);
+	list_add_tail(&new->l, &info->v2p_entry_lists);
 
 out:
 	spin_unlock_irqrestore(&info->v2p_lock, flags);
 
 out_free:
-	mutex_lock(&tpg->tv_tpg_mutex);
-	tpg->tv_tpg_fe_count--;
-	mutex_unlock(&tpg->tv_tpg_mutex);
-
-	if (err)
+	if (err) {
+		mutex_lock(&tpg->tv_tpg_mutex);
+		tpg->tv_tpg_fe_count--;
+		mutex_unlock(&tpg->tv_tpg_mutex);
 		kfree(new);
+	}
 
 	return err;
 }
@@ -956,39 +967,40 @@
 }
 
 /*
-  Delete the translation entry specfied
+  Delete the translation entry specified
 */
 static int scsiback_del_translation_entry(struct vscsibk_info *info,
 					  struct ids_tuple *v)
 {
 	struct v2p_entry *entry;
-	struct list_head *head = &(info->v2p_entry_lists);
 	unsigned long flags;
+	int ret = 0;
 
 	spin_lock_irqsave(&info->v2p_lock, flags);
 	/* Find out the translation entry specified */
-	list_for_each_entry(entry, head, l) {
-		if ((entry->v.chn == v->chn) &&
-		    (entry->v.tgt == v->tgt) &&
-		    (entry->v.lun == v->lun)) {
-			goto found;
-		}
-	}
+	entry = scsiback_chk_translation_entry(info, v);
+	if (entry)
+		__scsiback_del_translation_entry(entry);
+	else
+		ret = -ENOENT;
 
 	spin_unlock_irqrestore(&info->v2p_lock, flags);
-	return 1;
-
-found:
-	/* Delete the translation entry specfied */
-	__scsiback_del_translation_entry(entry);
-
-	spin_unlock_irqrestore(&info->v2p_lock, flags);
-	return 0;
+	return ret;
 }
 
 static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
 				char *phy, struct ids_tuple *vir, int try)
 {
+	struct v2p_entry *entry;
+	unsigned long flags;
+
+	if (try) {
+		spin_lock_irqsave(&info->v2p_lock, flags);
+		entry = scsiback_chk_translation_entry(info, vir);
+		spin_unlock_irqrestore(&info->v2p_lock, flags);
+		if (entry)
+			return;
+	}
 	if (!scsiback_add_translation_entry(info, phy, vir)) {
 		if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
 				  "%d", XenbusStateInitialised)) {
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 9433e46..912b64e 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -188,6 +188,8 @@
 
 	if (len == 0)
 		return 0;
+	if (len > XENSTORE_PAYLOAD_MAX)
+		return -EINVAL;
 
 	rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
 	if (rb == NULL)
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0548c53..22fc7c8 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -511,8 +511,6 @@
 	pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
 		 page->index, to);
 	BUG_ON(to > PAGE_CACHE_SIZE);
-	kmap(page);
-	data = page_address(page);
 	bsize = AFFS_SB(sb)->s_data_blksize;
 	tmp = page->index << PAGE_CACHE_SHIFT;
 	bidx = tmp / bsize;
@@ -524,14 +522,15 @@
 			return PTR_ERR(bh);
 		tmp = min(bsize - boff, to - pos);
 		BUG_ON(pos + tmp > to || tmp > bsize);
+		data = kmap_atomic(page);
 		memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
+		kunmap_atomic(data);
 		affs_brelse(bh);
 		bidx++;
 		pos += tmp;
 		boff = 0;
 	}
 	flush_dcache_page(page);
-	kunmap(page);
 	return 0;
 }
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 051ea48..7d914c6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -653,7 +653,7 @@
 
 	if ((current->flags & PF_RANDOMIZE) &&
 		!(current->personality & ADDR_NO_RANDOMIZE)) {
-		random_variable = (unsigned long) get_random_int();
+		random_variable = get_random_long();
 		random_variable &= STACK_RND_MASK;
 		random_variable <<= PAGE_SHIFT;
 	}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7b9cd49..826b164 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1201,7 +1201,11 @@
 		bdev->bd_disk = disk;
 		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
-		bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+		if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+			bdev->bd_inode->i_flags = S_DAX;
+		else
+			bdev->bd_inode->i_flags = 0;
+
 		if (!partno) {
 			ret = -ENXIO;
 			bdev->bd_part = disk_get_part(disk, partno);
@@ -1693,13 +1697,24 @@
 	return try_to_free_buffers(page);
 }
 
+static int blkdev_writepages(struct address_space *mapping,
+			     struct writeback_control *wbc)
+{
+	if (dax_mapping(mapping)) {
+		struct block_device *bdev = I_BDEV(mapping->host);
+
+		return dax_writeback_mapping_range(mapping, bdev, wbc);
+	}
+	return generic_writepages(mapping, wbc);
+}
+
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.readpages	= blkdev_readpages,
 	.writepage	= blkdev_writepage,
 	.write_begin	= blkdev_write_begin,
 	.write_end	= blkdev_write_end,
-	.writepages	= generic_writepages,
+	.writepages	= blkdev_writepages,
 	.releasepage	= blkdev_releasepage,
 	.direct_IO	= blkdev_direct_IO,
 	.is_dirty_writeback = buffer_check_dirty_writeback,
@@ -1730,43 +1745,25 @@
 	return __dax_fault(vma, vmf, blkdev_get_block, NULL);
 }
 
+static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
+		struct vm_fault *vmf)
+{
+	return dax_pfn_mkwrite(vma, vmf);
+}
+
 static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 		pmd_t *pmd, unsigned int flags)
 {
 	return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
 }
 
-static void blkdev_vm_open(struct vm_area_struct *vma)
-{
-	struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-	struct block_device *bdev = I_BDEV(bd_inode);
-
-	inode_lock(bd_inode);
-	bdev->bd_map_count++;
-	inode_unlock(bd_inode);
-}
-
-static void blkdev_vm_close(struct vm_area_struct *vma)
-{
-	struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-	struct block_device *bdev = I_BDEV(bd_inode);
-
-	inode_lock(bd_inode);
-	bdev->bd_map_count--;
-	inode_unlock(bd_inode);
-}
-
 static const struct vm_operations_struct blkdev_dax_vm_ops = {
-	.open		= blkdev_vm_open,
-	.close		= blkdev_vm_close,
 	.fault		= blkdev_dax_fault,
 	.pmd_fault	= blkdev_dax_pmd_fault,
-	.pfn_mkwrite	= blkdev_dax_fault,
+	.pfn_mkwrite	= blkdev_dax_pfn_mkwrite,
 };
 
 static const struct vm_operations_struct blkdev_default_vm_ops = {
-	.open		= blkdev_vm_open,
-	.close		= blkdev_vm_close,
 	.fault		= filemap_fault,
 	.map_pages	= filemap_map_pages,
 };
@@ -1774,18 +1771,14 @@
 static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *bd_inode = bdev_file_inode(file);
-	struct block_device *bdev = I_BDEV(bd_inode);
 
 	file_accessed(file);
-	inode_lock(bd_inode);
-	bdev->bd_map_count++;
 	if (IS_DAX(bd_inode)) {
 		vma->vm_ops = &blkdev_dax_vm_ops;
 		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
 	} else {
 		vma->vm_ops = &blkdev_default_vm_ops;
 	}
-	inode_unlock(bd_inode);
 
 	return 0;
 }
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 88d9af3..5fb60ea 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@
 		list_add_tail(&work->ordered_list, &wq->ordered_list);
 		spin_unlock_irqrestore(&wq->list_lock, flags);
 	}
-	queue_work(wq->normal_wq, &work->normal_work);
 	trace_btrfs_work_queued(work);
+	queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b90cd37..f6dac40 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1406,7 +1406,8 @@
 			read_extent_buffer(eb, dest + bytes_left,
 					   name_off, name_len);
 		if (eb != eb_in) {
-			btrfs_tree_read_unlock_blocking(eb);
+			if (!path->skip_locking)
+				btrfs_tree_read_unlock_blocking(eb);
 			free_extent_buffer(eb);
 		}
 		ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@
 		eb = path->nodes[0];
 		/* make sure we can use eb after releasing the path */
 		if (eb != eb_in) {
-			atomic_inc(&eb->refs);
-			btrfs_tree_read_lock(eb);
-			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+			if (!path->skip_locking)
+				btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+			path->nodes[0] = NULL;
+			path->locks[0] = 0;
 		}
 		btrfs_release_path(path);
 		iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42..3346cd8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -637,11 +637,7 @@
 	faili = nr_pages - 1;
 	cb->nr_pages = nr_pages;
 
-	/* In the parent-locked case, we only locked the range we are
-	 * interested in.  In all other cases, we can opportunistically
-	 * cache decompressed data that goes beyond the requested range. */
-	if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
-		add_ra_bio_pages(inode, em_start + em_len, cb);
+	add_ra_bio_pages(inode, em_start + em_len, cb);
 
 	/* include any pages we added in add_ra-bio_pages */
 	uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0be47e4..b57daa8 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1689,7 +1689,7 @@
  *
  */
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
-				    struct list_head *ins_list)
+				    struct list_head *ins_list, bool *emitted)
 {
 	struct btrfs_dir_item *di;
 	struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@
 
 		if (over)
 			return 1;
+		*emitted = true;
 	}
 	return 0;
 }
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index f70119f..0167853 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -144,7 +144,7 @@
 int btrfs_should_delete_dir_index(struct list_head *del_list,
 				  u64 index);
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
-				    struct list_head *ins_list);
+				    struct list_head *ins_list, bool *emitted);
 
 /* for init */
 int __init btrfs_delayed_inode_init(void);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd08e29..5699bbc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -182,6 +182,7 @@
 	{ .id = BTRFS_TREE_RELOC_OBJECTID,	.name_stem = "treloc"	},
 	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID,	.name_stem = "dreloc"	},
 	{ .id = BTRFS_UUID_TREE_OBJECTID,	.name_stem = "uuid"	},
+	{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID,	.name_stem = "free-space" },
 	{ .id = 0,				.name_stem = "tree"	},
 };
 
@@ -930,7 +931,7 @@
 	if (bio_flags & EXTENT_BIO_TREE_LOG)
 		return 0;
 #ifdef CONFIG_X86
-	if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
+	if (static_cpu_has(X86_FEATURE_XMM4_2))
 		return 0;
 #endif
 	return 1;
@@ -1787,7 +1788,6 @@
 	int again;
 	struct btrfs_trans_handle *trans;
 
-	set_freezable();
 	do {
 		again = 0;
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e7c97a..392592d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2897,12 +2897,11 @@
 	struct block_device *bdev;
 	int ret;
 	int nr = 0;
-	int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
 	size_t pg_offset = 0;
 	size_t iosize;
 	size_t disk_io_size;
 	size_t blocksize = inode->i_sb->s_blocksize;
-	unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
+	unsigned long this_bio_flag = 0;
 
 	set_page_extent_mapped(page);
 
@@ -2942,18 +2941,16 @@
 			kunmap_atomic(userpage);
 			set_extent_uptodate(tree, cur, cur + iosize - 1,
 					    &cached, GFP_NOFS);
-			if (!parent_locked)
-				unlock_extent_cached(tree, cur,
-						     cur + iosize - 1,
-						     &cached, GFP_NOFS);
+			unlock_extent_cached(tree, cur,
+					     cur + iosize - 1,
+					     &cached, GFP_NOFS);
 			break;
 		}
 		em = __get_extent_map(inode, page, pg_offset, cur,
 				      end - cur + 1, get_extent, em_cached);
 		if (IS_ERR_OR_NULL(em)) {
 			SetPageError(page);
-			if (!parent_locked)
-				unlock_extent(tree, cur, end);
+			unlock_extent(tree, cur, end);
 			break;
 		}
 		extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@
 
 			set_extent_uptodate(tree, cur, cur + iosize - 1,
 					    &cached, GFP_NOFS);
-			if (parent_locked)
-				free_extent_state(cached);
-			else
-				unlock_extent_cached(tree, cur,
-						     cur + iosize - 1,
-						     &cached, GFP_NOFS);
+			unlock_extent_cached(tree, cur,
+					     cur + iosize - 1,
+					     &cached, GFP_NOFS);
 			cur = cur + iosize;
 			pg_offset += iosize;
 			continue;
@@ -3052,8 +3046,7 @@
 		if (test_range_bit(tree, cur, cur_end,
 				   EXTENT_UPTODATE, 1, NULL)) {
 			check_page_uptodate(tree, page);
-			if (!parent_locked)
-				unlock_extent(tree, cur, cur + iosize - 1);
+			unlock_extent(tree, cur, cur + iosize - 1);
 			cur = cur + iosize;
 			pg_offset += iosize;
 			continue;
@@ -3063,8 +3056,7 @@
 		 */
 		if (block_start == EXTENT_MAP_INLINE) {
 			SetPageError(page);
-			if (!parent_locked)
-				unlock_extent(tree, cur, cur + iosize - 1);
+			unlock_extent(tree, cur, cur + iosize - 1);
 			cur = cur + iosize;
 			pg_offset += iosize;
 			continue;
@@ -3083,8 +3075,7 @@
 			*bio_flags = this_bio_flag;
 		} else {
 			SetPageError(page);
-			if (!parent_locked)
-				unlock_extent(tree, cur, cur + iosize - 1);
+			unlock_extent(tree, cur, cur + iosize - 1);
 		}
 		cur = cur + iosize;
 		pg_offset += iosize;
@@ -3213,20 +3204,6 @@
 	return ret;
 }
 
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
-				 get_extent_t *get_extent, int mirror_num)
-{
-	struct bio *bio = NULL;
-	unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
-	int ret;
-
-	ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
-			    &bio_flags, READ, NULL);
-	if (bio)
-		ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
-	return ret;
-}
-
 static noinline void update_nr_written(struct page *page,
 				      struct writeback_control *wbc,
 				      unsigned long nr_written)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0377413..880d529 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,7 +29,6 @@
  */
 #define EXTENT_BIO_COMPRESSED 1
 #define EXTENT_BIO_TREE_LOG 2
-#define EXTENT_BIO_PARENT_LOCKED 4
 #define EXTENT_BIO_FLAG_SHIFT 16
 
 /* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
 			  get_extent_t *get_extent, int mirror_num);
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
-				 get_extent_t *get_extent, int mirror_num);
 int __init extent_io_init(void);
 void extent_io_exit(void);
 
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36b..53dbeaf 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+	void *mem;
+
+	/*
+	 * The allocation size varies, observed numbers were < 4K up to 16K.
+	 * Using vmalloc unconditionally would be too heavy, we'll try
+	 * contiguous allocations first.
+	 */
+	if  (bitmap_size <= PAGE_SIZE)
+		return kzalloc(bitmap_size, GFP_NOFS);
+
+	mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+	if (mem)
+		return mem;
+
 	return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
 			 PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@
 
 	ret = 0;
 out:
-	vfree(bitmap);
+	kvfree(bitmap);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;
@@ -438,7 +452,7 @@
 
 	ret = 0;
 out:
-	vfree(bitmap);
+	kvfree(bitmap);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e28f3d4..d96f5cf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5717,6 +5717,7 @@
 	char *name_ptr;
 	int name_len;
 	int is_curr = 0;	/* ctx->pos points to the current index? */
+	bool emitted;
 
 	/* FIXME, use a real flag for deciding about the key type */
 	if (root->fs_info->tree_root == root)
@@ -5745,6 +5746,7 @@
 	if (ret < 0)
 		goto err;
 
+	emitted = false;
 	while (1) {
 		leaf = path->nodes[0];
 		slot = path->slots[0];
@@ -5824,6 +5826,7 @@
 
 			if (over)
 				goto nopos;
+			emitted = true;
 			di_len = btrfs_dir_name_len(leaf, di) +
 				 btrfs_dir_data_len(leaf, di) + sizeof(*di);
 			di_cur += di_len;
@@ -5836,11 +5839,20 @@
 	if (key_type == BTRFS_DIR_INDEX_KEY) {
 		if (is_curr)
 			ctx->pos++;
-		ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
+		ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
 		if (ret)
 			goto nopos;
 	}
 
+	/*
+	 * If we haven't emitted any dir entry, we must not touch ctx->pos as
+	 * it was was set to the termination value in previous call. We assume
+	 * that "." and ".." were emitted if we reach this point and set the
+	 * termination value as well for an empty directory.
+	 */
+	if (ctx->pos > 2 && !emitted)
+		goto nopos;
+
 	/* Reached end of directory/root. Bump pos past the last item. */
 	ctx->pos++;
 
@@ -7116,21 +7128,41 @@
 	if (ret)
 		return ERR_PTR(ret);
 
-	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-			      ins.offset, ins.offset, ins.offset, 0);
-	if (IS_ERR(em)) {
-		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-		return em;
-	}
-
+	/*
+	 * Create the ordered extent before the extent map. This is to avoid
+	 * races with the fast fsync path that would lead to it logging file
+	 * extent items that point to disk extents that were not yet written to.
+	 * The fast fsync path collects ordered extents into a local list and
+	 * then collects all the new extent maps, so we must create the ordered
+	 * extent first and make sure the fast fsync path collects any new
+	 * ordered extents after collecting new extent maps as well.
+	 * The fsync path simply can not rely on inode_dio_wait() because it
+	 * causes deadlock with AIO.
+	 */
 	ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
 					   ins.offset, ins.offset, 0);
 	if (ret) {
 		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-		free_extent_map(em);
 		return ERR_PTR(ret);
 	}
 
+	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+			      ins.offset, ins.offset, ins.offset, 0);
+	if (IS_ERR(em)) {
+		struct btrfs_ordered_extent *oe;
+
+		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+		oe = btrfs_lookup_ordered_extent(inode, start);
+		ASSERT(oe);
+		if (WARN_ON(!oe))
+			return em;
+		set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+		set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+		btrfs_remove_ordered_extent(inode, oe);
+		/* Once for our lookup and once for the ordered extents tree. */
+		btrfs_put_ordered_extent(oe);
+		btrfs_put_ordered_extent(oe);
+	}
 	return em;
 }
 
@@ -7954,6 +7986,7 @@
 
 	kfree(dip);
 
+	dio_bio->bi_error = bio->bi_error;
 	dio_end_io(dio_bio, bio->bi_error);
 
 	if (io_bio->end_io)
@@ -8008,6 +8041,7 @@
 
 	kfree(dip);
 
+	dio_bio->bi_error = bio->bi_error;
 	dio_end_io(dio_bio, bio->bi_error);
 	bio_put(bio);
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 952172c..48aee98 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2794,24 +2794,29 @@
 static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
 {
 	struct page *page;
-	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
 	page = grab_cache_page(inode->i_mapping, index);
 	if (!page)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	if (!PageUptodate(page)) {
-		if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
-						 0))
-			return NULL;
+		int ret;
+
+		ret = btrfs_readpage(NULL, page);
+		if (ret)
+			return ERR_PTR(ret);
 		lock_page(page);
 		if (!PageUptodate(page)) {
 			unlock_page(page);
 			page_cache_release(page);
-			return NULL;
+			return ERR_PTR(-EIO);
+		}
+		if (page->mapping != inode->i_mapping) {
+			unlock_page(page);
+			page_cache_release(page);
+			return ERR_PTR(-EAGAIN);
 		}
 	}
-	unlock_page(page);
 
 	return page;
 }
@@ -2823,17 +2828,31 @@
 	pgoff_t index = off >> PAGE_CACHE_SHIFT;
 
 	for (i = 0; i < num_pages; i++) {
+again:
 		pages[i] = extent_same_get_page(inode, index + i);
-		if (!pages[i])
-			return -ENOMEM;
+		if (IS_ERR(pages[i])) {
+			int err = PTR_ERR(pages[i]);
+
+			if (err == -EAGAIN)
+				goto again;
+			pages[i] = NULL;
+			return err;
+		}
 	}
 	return 0;
 }
 
-static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+static int lock_extent_range(struct inode *inode, u64 off, u64 len,
+			     bool retry_range_locking)
 {
-	/* do any pending delalloc/csum calc on src, one way or
-	   another, and lock file content */
+	/*
+	 * Do any pending delalloc/csum calculations on inode, one way or
+	 * another, and lock file content.
+	 * The locking order is:
+	 *
+	 *   1) pages
+	 *   2) range in the inode's io tree
+	 */
 	while (1) {
 		struct btrfs_ordered_extent *ordered;
 		lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2851,8 +2870,11 @@
 		unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
+		if (!retry_range_locking)
+			return -EAGAIN;
 		btrfs_wait_ordered_range(inode, off, len);
 	}
+	return 0;
 }
 
 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2877,15 +2899,24 @@
 	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
 }
 
-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
-				     struct inode *inode2, u64 loff2, u64 len)
+static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+				    struct inode *inode2, u64 loff2, u64 len,
+				    bool retry_range_locking)
 {
+	int ret;
+
 	if (inode1 < inode2) {
 		swap(inode1, inode2);
 		swap(loff1, loff2);
 	}
-	lock_extent_range(inode1, loff1, len);
-	lock_extent_range(inode2, loff2, len);
+	ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
+	if (ret)
+		return ret;
+	ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
+	if (ret)
+		unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
+			      loff1 + len - 1);
+	return ret;
 }
 
 struct cmp_pages {
@@ -2901,11 +2932,15 @@
 
 	for (i = 0; i < cmp->num_pages; i++) {
 		pg = cmp->src_pages[i];
-		if (pg)
+		if (pg) {
+			unlock_page(pg);
 			page_cache_release(pg);
+		}
 		pg = cmp->dst_pages[i];
-		if (pg)
+		if (pg) {
+			unlock_page(pg);
 			page_cache_release(pg);
+		}
 	}
 	kfree(cmp->src_pages);
 	kfree(cmp->dst_pages);
@@ -2966,6 +3001,8 @@
 
 		src_page = cmp->src_pages[i];
 		dst_page = cmp->dst_pages[i];
+		ASSERT(PageLocked(src_page));
+		ASSERT(PageLocked(dst_page));
 
 		addr = kmap_atomic(src_page);
 		dst_addr = kmap_atomic(dst_page);
@@ -3078,14 +3115,46 @@
 		goto out_unlock;
 	}
 
+again:
 	ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
 	if (ret)
 		goto out_unlock;
 
 	if (same_inode)
-		lock_extent_range(src, same_lock_start, same_lock_len);
+		ret = lock_extent_range(src, same_lock_start, same_lock_len,
+					false);
 	else
-		btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+		ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
+					       false);
+	/*
+	 * If one of the inodes has dirty pages in the respective range or
+	 * ordered extents, we need to flush dellaloc and wait for all ordered
+	 * extents in the range. We must unlock the pages and the ranges in the
+	 * io trees to avoid deadlocks when flushing delalloc (requires locking
+	 * pages) and when waiting for ordered extents to complete (they require
+	 * range locking).
+	 */
+	if (ret == -EAGAIN) {
+		/*
+		 * Ranges in the io trees already unlocked. Now unlock all
+		 * pages before waiting for all IO to complete.
+		 */
+		btrfs_cmp_data_free(&cmp);
+		if (same_inode) {
+			btrfs_wait_ordered_range(src, same_lock_start,
+						 same_lock_len);
+		} else {
+			btrfs_wait_ordered_range(src, loff, len);
+			btrfs_wait_ordered_range(dst, dst_loff, len);
+		}
+		goto again;
+	}
+	ASSERT(ret == 0);
+	if (WARN_ON(ret)) {
+		/* ranges in the io trees already unlocked */
+		btrfs_cmp_data_free(&cmp);
+		return ret;
+	}
 
 	/* pass original length for comparison so we stay within i_size */
 	ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3795,9 +3864,15 @@
 		u64 lock_start = min_t(u64, off, destoff);
 		u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
 
-		lock_extent_range(src, lock_start, lock_len);
+		ret = lock_extent_range(src, lock_start, lock_len, true);
 	} else {
-		btrfs_double_extent_lock(src, off, inode, destoff, len);
+		ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
+					       true);
+	}
+	ASSERT(ret == 0);
+	if (WARN_ON(ret)) {
+		/* ranges in the io trees already unlocked */
+		goto out_unlock;
 	}
 
 	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fd1c4d9..2bd0011 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@
 	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 	    root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
 	    root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+	    root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
 		return 1;
 	return 0;
 }
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509..2c849b0 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -310,8 +310,16 @@
 		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
 		err = btrfs_insert_fs_root(root->fs_info, root);
+		/*
+		 * The root might have been inserted already, as before we look
+		 * for orphan roots, log replay might have happened, which
+		 * triggers a transaction commit and qgroup accounting, which
+		 * in turn reads and inserts fs roots while doing backref
+		 * walking.
+		 */
+		if (err == -EEXIST)
+			err = 0;
 		if (err) {
-			BUG_ON(err == -EEXIST);
 			btrfs_free_fs_root(root);
 			break;
 		}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac859..539e7b5 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@
 	BTRFS_FEAT_ATTR_PTR(raid56),
 	BTRFS_FEAT_ATTR_PTR(skinny_metadata),
 	BTRFS_FEAT_ATTR_PTR(no_holes),
+	BTRFS_FEAT_ATTR_PTR(free_space_tree),
 	NULL
 };
 
@@ -780,6 +782,39 @@
 	return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+		u64 bit, enum btrfs_feature_set set)
+{
+	struct btrfs_fs_devices *fs_devs;
+	struct kobject *fsid_kobj;
+	u64 features;
+	int ret;
+
+	if (!fs_info)
+		return;
+
+	features = get_features(fs_info, set);
+	ASSERT(bit & supported_feature_masks[set]);
+
+	fs_devs = fs_info->fs_devices;
+	fsid_kobj = &fs_devs->fsid_kobj;
+
+	if (!fsid_kobj->state_initialized)
+		return;
+
+	/*
+	 * FIXME: this is too heavy to update just one value, ideally we'd like
+	 * to use sysfs_update_group but some refactoring is needed first.
+	 */
+	sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+	ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c09522..d7da1a4 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
 	BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-	BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+	BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
 	BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@
 				struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+		u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b1d920b..0e1e61a 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
 	struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-						GFP_NOFS);
+						GFP_KERNEL);
 
 	if (!fs_info)
 		return fs_info;
 	fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-				      GFP_NOFS);
+				      GFP_KERNEL);
 	if (!fs_info->fs_devices) {
 		kfree(fs_info);
 		return NULL;
 	}
 	fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-				      GFP_NOFS);
+				      GFP_KERNEL);
 	if (!fs_info->super_copy) {
 		kfree(fs_info->fs_devices);
 		kfree(fs_info);
@@ -180,11 +180,11 @@
 {
 	struct btrfs_block_group_cache *cache;
 
-	cache = kzalloc(sizeof(*cache), GFP_NOFS);
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 	if (!cache)
 		return NULL;
 	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-					GFP_NOFS);
+					GFP_KERNEL);
 	if (!cache->free_space_ctl) {
 		kfree(cache);
 		return NULL;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index e29fa29..669b582 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@
 	 * test.
 	 */
 	for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			test_msg("Failed to allocate test page\n");
 			ret = -ENOMEM;
@@ -113,7 +113,7 @@
 	 * |--- delalloc ---|
 	 * |---  search  ---|
 	 */
-	set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
 	start = 0;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@
 		test_msg("Couldn't find the locked page\n");
 		goto out_bits;
 	}
-	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@
 	 *
 	 * We are re-using our test_start from above since it works out well.
 	 */
-	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@
 	}
 	ret = 0;
 out_bits:
-	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
 	if (locked_page)
 		page_cache_release(locked_page);
@@ -360,7 +360,7 @@
 
 	test_msg("Running extent buffer bitmap tests\n");
 
-	bitmap = kmalloc(len, GFP_NOFS);
+	bitmap = kmalloc(len, GFP_KERNEL);
 	if (!bitmap) {
 		test_msg("Couldn't allocate test bitmap\n");
 		return -ENOMEM;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 5de55fd..e2d3da0 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@
 			       (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
 			       EXTENT_DELALLOC | EXTENT_DIRTY |
 			       EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1045,7 +1045,7 @@
 			       BTRFS_MAX_EXTENT_SIZE+8191,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1079,7 +1079,7 @@
 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1096,7 +1096,7 @@
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 				 EXTENT_DIRTY | EXTENT_DELALLOC |
 				 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-				 NULL, GFP_NOFS);
+				 NULL, GFP_KERNEL);
 	iput(inode);
 	btrfs_free_dummy_root(root);
 	return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12c..978c3a8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@
 				     struct inode *inode,
 				     struct btrfs_path *path,
 				     struct list_head *logged_list,
-				     struct btrfs_log_ctx *ctx)
+				     struct btrfs_log_ctx *ctx,
+				     const u64 start,
+				     const u64 end)
 {
 	struct extent_map *em, *n;
 	struct list_head extents;
@@ -4166,7 +4168,13 @@
 	}
 
 	list_sort(NULL, &extents, extent_cmp);
-
+	/*
+	 * Collect any new ordered extents within the range. This is to
+	 * prevent logging file extent items without waiting for the disk
+	 * location they point to being written. We do this only to deal
+	 * with races against concurrent lockless direct IO writes.
+	 */
+	btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
 	while (!list_empty(&extents)) {
 		em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@
 			goto out_unlock;
 		}
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-						&logged_list, ctx);
+						&logged_list, ctx, start, end);
 		if (ret) {
 			err = ret;
 			goto out_unlock;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c2221378..19adeb0 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1756,6 +1756,10 @@
 	u32 pool;
 	int ret, flags;
 
+	/* does not support pool namespace yet */
+	if (ci->i_pool_ns_len)
+		return -EIO;
+
 	if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
 				NOPOOLPERM))
 		return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cdbf8cf..6fe0ad2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2753,7 +2753,8 @@
 			     void *inline_data, int inline_len,
 			     struct ceph_buffer *xattr_buf,
 			     struct ceph_mds_session *session,
-			     struct ceph_cap *cap, int issued)
+			     struct ceph_cap *cap, int issued,
+			     u32 pool_ns_len)
 	__releases(ci->i_ceph_lock)
 	__releases(mdsc->snap_rwsem)
 {
@@ -2873,6 +2874,8 @@
 	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
 		/* file layout may have changed */
 		ci->i_layout = grant->layout;
+		ci->i_pool_ns_len = pool_ns_len;
+
 		/* size/truncate_seq? */
 		queue_trunc = ceph_fill_file_size(inode, issued,
 					le32_to_cpu(grant->truncate_seq),
@@ -3411,6 +3414,7 @@
 	u32  inline_len = 0;
 	void *snaptrace;
 	size_t snaptrace_len;
+	u32 pool_ns_len = 0;
 	void *p, *end;
 
 	dout("handle_caps from mds%d\n", mds);
@@ -3463,6 +3467,21 @@
 		p += inline_len;
 	}
 
+	if (le16_to_cpu(msg->hdr.version) >= 8) {
+		u64 flush_tid;
+		u32 caller_uid, caller_gid;
+		u32 osd_epoch_barrier;
+		/* version >= 5 */
+		ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+		/* version >= 6 */
+		ceph_decode_64_safe(&p, end, flush_tid, bad);
+		/* version >= 7 */
+		ceph_decode_32_safe(&p, end, caller_uid, bad);
+		ceph_decode_32_safe(&p, end, caller_gid, bad);
+		/* version >= 8 */
+		ceph_decode_32_safe(&p, end, pool_ns_len, bad);
+	}
+
 	/* lookup ino */
 	inode = ceph_find_inode(sb, vino);
 	ci = ceph_inode(inode);
@@ -3518,7 +3537,8 @@
 				  &cap, &issued);
 		handle_cap_grant(mdsc, inode, h,
 				 inline_version, inline_data, inline_len,
-				 msg->middle, session, cap, issued);
+				 msg->middle, session, cap, issued,
+				 pool_ns_len);
 		if (realm)
 			ceph_put_snap_realm(mdsc, realm);
 		goto done_unlocked;
@@ -3542,7 +3562,8 @@
 		issued |= __ceph_caps_dirty(ci);
 		handle_cap_grant(mdsc, inode, h,
 				 inline_version, inline_data, inline_len,
-				 msg->middle, session, cap, issued);
+				 msg->middle, session, cap, issued,
+				 pool_ns_len);
 		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 86a9c38..eb9028e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -698,8 +698,8 @@
 
 	req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
 			false, GFP_NOFS);
-	if (IS_ERR(req)) {
-		ret = PTR_ERR(req);
+	if (!req) {
+		ret = -ENOMEM;
 		req = orig_req;
 		goto out;
 	}
@@ -716,7 +716,6 @@
 	ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
 				snapc, CEPH_NOSNAP, &aio_req->mtime);
 
-	ceph_put_snap_context(snapc);
 	ceph_osdc_put_request(orig_req);
 
 	req->r_callback = ceph_aio_complete_req;
@@ -731,6 +730,7 @@
 		ceph_aio_complete_req(req, NULL);
 	}
 
+	ceph_put_snap_context(snapc);
 	kfree(aio_work);
 }
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fb4ba2e..5849b88 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -396,6 +396,7 @@
 	ci->i_symlink = NULL;
 
 	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+	ci->i_pool_ns_len = 0;
 
 	ci->i_fragtree = RB_ROOT;
 	mutex_init(&ci->i_fragtree_mutex);
@@ -756,6 +757,7 @@
 		if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
 			ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
 		ci->i_layout = info->layout;
+		ci->i_pool_ns_len = iinfo->pool_ns_len;
 
 		queue_trunc = ceph_fill_file_size(inode, issued,
 					le32_to_cpu(info->truncate_seq),
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a..911d64d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,14 @@
 	} else
 		info->inline_version = CEPH_INLINE_NONE;
 
+	if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
+		ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
+		ceph_decode_need(p, end, info->pool_ns_len, bad);
+		*p += info->pool_ns_len;
+	} else {
+		info->pool_ns_len = 0;
+	}
+
 	return 0;
 bad:
 	return err;
@@ -2298,6 +2306,14 @@
 		ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
 				  CEPH_CAP_PIN);
 
+	/* deny access to directories with pool_ns layouts */
+	if (req->r_inode && S_ISDIR(req->r_inode->i_mode) &&
+	    ceph_inode(req->r_inode)->i_pool_ns_len)
+		return -EIO;
+	if (req->r_locked_dir &&
+	    ceph_inode(req->r_locked_dir)->i_pool_ns_len)
+		return -EIO;
+
 	/* issue */
 	mutex_lock(&mdsc->mutex);
 	__register_request(mdsc, req, dir);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ccf11ef..37712cc 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -44,6 +44,7 @@
 	u64 inline_version;
 	u32 inline_len;
 	char *inline_data;
+	u32 pool_ns_len;
 };
 
 /*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d12..9c458eb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -287,6 +287,7 @@
 
 	struct ceph_dir_layout i_dir_layout;
 	struct ceph_file_layout i_layout;
+	size_t i_pool_ns_len;
 	char *i_symlink;
 
 	/* for dirs */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7dc886c..e956cba 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -175,7 +175,7 @@
 	 * string to the length of the original string to allow for worst case.
 	 */
 	md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
-	mountdata = kzalloc(md_len + 1, GFP_KERNEL);
+	mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL);
 	if (mountdata == NULL) {
 		rc = -ENOMEM;
 		goto compose_mount_options_err;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index afa09fc..e682b36 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -714,7 +714,7 @@
 
 	ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
 	if (!ses->auth_key.response) {
-		rc = ENOMEM;
+		rc = -ENOMEM;
 		ses->auth_key.len = 0;
 		goto setup_ntlmv2_rsp_ret;
 	}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c48ca13..2eea403 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1013,7 +1013,6 @@
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.clone_file_range = cifs_clone_file_range,
-	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 68c4547..83aac8b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -31,19 +31,15 @@
  * so that it will fit. We use hash_64 to convert the value to 31 bits, and
  * then add 1, to ensure that we don't end up with a 0 as the value.
  */
-#if BITS_PER_LONG == 64
 static inline ino_t
 cifs_uniqueid_to_ino_t(u64 fileid)
 {
+	if ((sizeof(ino_t)) < (sizeof(u64)))
+		return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
+
 	return (ino_t)fileid;
+
 }
-#else
-static inline ino_t
-cifs_uniqueid_to_ino_t(u64 fileid)
-{
-	return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
-}
-#endif
 
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 90b4f9f..76fcb50 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1396,11 +1396,10 @@
  * current bigbuf.
  */
 static int
-cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+discard_remaining_data(struct TCP_Server_Info *server)
 {
 	unsigned int rfclen = get_rfc1002_length(server->smallbuf);
 	int remaining = rfclen + 4 - server->total_read;
-	struct cifs_readdata *rdata = mid->callback_data;
 
 	while (remaining > 0) {
 		int length;
@@ -1414,10 +1413,20 @@
 		remaining -= length;
 	}
 
-	dequeue_mid(mid, rdata->result);
 	return 0;
 }
 
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	int length;
+	struct cifs_readdata *rdata = mid->callback_data;
+
+	length = discard_remaining_data(server);
+	dequeue_mid(mid, rdata->result);
+	return length;
+}
+
 int
 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 {
@@ -1446,6 +1455,12 @@
 		return length;
 	server->total_read += length;
 
+	if (server->ops->is_status_pending &&
+	    server->ops->is_status_pending(buf, server, 0)) {
+		discard_remaining_data(server);
+		return -1;
+	}
+
 	/* Was the SMB read successful? */
 	rdata->result = server->ops->map_error(buf, false);
 	if (rdata->result != 0) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4fbd92d..a763cd3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2999,8 +2999,7 @@
 	if (ses_init_buf) {
 		ses_init_buf->trailer.session_req.called_len = 32;
 
-		if (server->server_RFC1001_name &&
-		    server->server_RFC1001_name[0] != 0)
+		if (server->server_RFC1001_name[0] != 0)
 			rfc1002mangle(ses_init_buf->trailer.
 				      session_req.called_name,
 				      server->server_RFC1001_name,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 10f8d5c..42e1f44 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1106,21 +1106,25 @@
 {
 	char *data_offset;
 	struct create_context *cc;
-	unsigned int next = 0;
+	unsigned int next;
+	unsigned int remaining;
 	char *name;
 
 	data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
+	remaining = le32_to_cpu(rsp->CreateContextsLength);
 	cc = (struct create_context *)data_offset;
-	do {
-		cc = (struct create_context *)((char *)cc + next);
+	while (remaining >= sizeof(struct create_context)) {
 		name = le16_to_cpu(cc->NameOffset) + (char *)cc;
-		if (le16_to_cpu(cc->NameLength) != 4 ||
-		    strncmp(name, "RqLs", 4)) {
-			next = le32_to_cpu(cc->Next);
-			continue;
-		}
-		return server->ops->parse_lease_buf(cc, epoch);
-	} while (next != 0);
+		if (le16_to_cpu(cc->NameLength) == 4 &&
+		    strncmp(name, "RqLs", 4) == 0)
+			return server->ops->parse_lease_buf(cc, epoch);
+
+		next = le32_to_cpu(cc->Next);
+		if (!next)
+			break;
+		remaining -= next;
+		cc = (struct create_context *)((char *)cc + next);
+	}
 
 	return 0;
 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a5b8eb6..6402eaf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1261,6 +1261,9 @@
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
+COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
+COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
 COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
 COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
 COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
diff --git a/fs/dax.c b/fs/dax.c
index 4fd6b0c..bbb2ad7 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -58,16 +58,35 @@
 	blk_queue_exit(bdev->bd_queue);
 }
 
+struct page *read_dax_sector(struct block_device *bdev, sector_t n)
+{
+	struct page *page = alloc_pages(GFP_KERNEL, 0);
+	struct blk_dax_ctl dax = {
+		.size = PAGE_SIZE,
+		.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
+	};
+	long rc;
+
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	rc = dax_map_atomic(bdev, &dax);
+	if (rc < 0)
+		return ERR_PTR(rc);
+	memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
+	dax_unmap_atomic(bdev, &dax);
+	return page;
+}
+
 /*
- * dax_clear_blocks() is called from within transaction context from XFS,
+ * dax_clear_sectors() is called from within transaction context from XFS,
  * and hence this means the stack from this point must follow GFP_NOFS
  * semantics for all operations.
  */
-int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
 {
-	struct block_device *bdev = inode->i_sb->s_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = block << (inode->i_blkbits - 9),
+		.sector = _sector,
 		.size = _size,
 	};
 
@@ -89,7 +108,7 @@
 	wmb_pmem();
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dax_clear_blocks);
+EXPORT_SYMBOL_GPL(dax_clear_sectors);
 
 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
 static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -338,7 +357,8 @@
 	void *entry;
 
 	WARN_ON_ONCE(pmd_entry && !dirty);
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	if (dirty)
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
 	spin_lock_irq(&mapping->tree_lock);
 
@@ -464,11 +484,10 @@
  * end]. This is required by data integrity operations to ensure file data is
  * on persistent storage prior to completion of the operation.
  */
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
-		loff_t end)
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	struct block_device *bdev = inode->i_sb->s_bdev;
 	pgoff_t start_index, end_index, pmd_index;
 	pgoff_t indices[PAGEVEC_SIZE];
 	struct pagevec pvec;
@@ -479,8 +498,11 @@
 	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
 		return -EIO;
 
-	start_index = start >> PAGE_CACHE_SHIFT;
-	end_index = end >> PAGE_CACHE_SHIFT;
+	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+		return 0;
+
+	start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
+	end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
 	pmd_index = DAX_PMD_INDEX(start_index);
 
 	rcu_read_lock();
@@ -1034,6 +1056,7 @@
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct file *file = vma->vm_file;
+	int error;
 
 	/*
 	 * We pass NO_SECTOR to dax_radix_entry() because we expect that a
@@ -1043,7 +1066,13 @@
 	 * saves us from having to make a call to get_block() here to look
 	 * up the sector.
 	 */
-	dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
+	error = dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false,
+			true);
+
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM;
+	if (error)
+		return VM_FAULT_SIGBUS;
 	return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
diff --git a/fs/dcache.c b/fs/dcache.c
index 92d5140..2398f9f9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,9 +269,6 @@
 	return dentry->d_name.name != dentry->d_iname;
 }
 
-/*
- * Make sure other CPUs see the inode attached before the type is set.
- */
 static inline void __d_set_inode_and_type(struct dentry *dentry,
 					  struct inode *inode,
 					  unsigned type_flags)
@@ -279,28 +276,18 @@
 	unsigned flags;
 
 	dentry->d_inode = inode;
-	smp_wmb();
 	flags = READ_ONCE(dentry->d_flags);
 	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
 	flags |= type_flags;
 	WRITE_ONCE(dentry->d_flags, flags);
 }
 
-/*
- * Ideally, we want to make sure that other CPUs see the flags cleared before
- * the inode is detached, but this is really a violation of RCU principles
- * since the ordering suggests we should always set inode before flags.
- *
- * We should instead replace or discard the entire dentry - but that sucks
- * performancewise on mass deletion/rename.
- */
 static inline void __d_clear_type_and_inode(struct dentry *dentry)
 {
 	unsigned flags = READ_ONCE(dentry->d_flags);
 
 	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
 	WRITE_ONCE(dentry->d_flags, flags);
-	smp_wmb();
 	dentry->d_inode = NULL;
 }
 
@@ -370,9 +357,11 @@
 	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
+
+	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_clear_type_and_inode(dentry);
 	hlist_del_init(&dentry->d_u.d_alias);
-	dentry_rcuwalk_invalidate(dentry);
+	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
 	if (!inode->i_nlink)
@@ -1758,8 +1747,9 @@
 	spin_lock(&dentry->d_lock);
 	if (inode)
 		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_set_inode_and_type(dentry, inode, add_flags);
-	dentry_rcuwalk_invalidate(dentry);
+	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	fsnotify_d_instantiate(dentry, inode);
 }
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1f107fd..655f21f 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -575,6 +575,26 @@
 	mutex_unlock(&allocated_ptys_lock);
 }
 
+/*
+ * pty code needs to hold extra references in case of last /dev/tty close
+ */
+
+void devpts_add_ref(struct inode *ptmx_inode)
+{
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+
+	atomic_inc(&sb->s_active);
+	ihold(ptmx_inode);
+}
+
+void devpts_del_ref(struct inode *ptmx_inode)
+{
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+
+	iput(ptmx_inode);
+	deactivate_super(sb);
+}
+
 /**
  * devpts_pty_new -- create a new inode in /dev/pts/
  * @ptmx_inode: inode of the master
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ff..d6a9012 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -472,8 +472,8 @@
 		dio->io_error = -EIO;
 
 	if (dio->is_async && dio->rw == READ && dio->should_dirty) {
-		bio_check_pages_dirty(bio);	/* transfers ownership */
 		err = bio->bi_error;
+		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		bio_for_each_segment_all(bvec, bio, i) {
 			struct page *page = bvec->bv_page;
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index c424e48..d48e0d2 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -10,6 +10,7 @@
 #include <linux/efi.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/mount.h>
 
 #include "internal.h"
 
@@ -103,9 +104,78 @@
 	return size;
 }
 
+static int
+efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+{
+	struct inode *inode = file->f_mapping->host;
+	unsigned int i_flags;
+	unsigned int flags = 0;
+
+	i_flags = inode->i_flags;
+	if (i_flags & S_IMMUTABLE)
+		flags |= FS_IMMUTABLE_FL;
+
+	if (copy_to_user(arg, &flags, sizeof(flags)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+efivarfs_ioc_setxflags(struct file *file, void __user *arg)
+{
+	struct inode *inode = file->f_mapping->host;
+	unsigned int flags;
+	unsigned int i_flags = 0;
+	int error;
+
+	if (!inode_owner_or_capable(inode))
+		return -EACCES;
+
+	if (copy_from_user(&flags, arg, sizeof(flags)))
+		return -EFAULT;
+
+	if (flags & ~FS_IMMUTABLE_FL)
+		return -EOPNOTSUPP;
+
+	if (!capable(CAP_LINUX_IMMUTABLE))
+		return -EPERM;
+
+	if (flags & FS_IMMUTABLE_FL)
+		i_flags |= S_IMMUTABLE;
+
+
+	error = mnt_want_write_file(file);
+	if (error)
+		return error;
+
+	inode_lock(inode);
+	inode_set_flags(inode, i_flags, S_IMMUTABLE);
+	inode_unlock(inode);
+
+	mnt_drop_write_file(file);
+
+	return 0;
+}
+
+long
+efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p)
+{
+	void __user *arg = (void __user *)p;
+
+	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		return efivarfs_ioc_getxflags(file, arg);
+	case FS_IOC_SETFLAGS:
+		return efivarfs_ioc_setxflags(file, arg);
+	}
+
+	return -ENOTTY;
+}
+
 const struct file_operations efivarfs_file_operations = {
 	.open	= simple_open,
 	.read	= efivarfs_file_read,
 	.write	= efivarfs_file_write,
 	.llseek	= no_llseek,
+	.unlocked_ioctl = efivarfs_file_ioctl,
 };
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 3381b9d..e2ab6d0 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -15,7 +15,8 @@
 #include "internal.h"
 
 struct inode *efivarfs_get_inode(struct super_block *sb,
-				const struct inode *dir, int mode, dev_t dev)
+				const struct inode *dir, int mode,
+				dev_t dev, bool is_removable)
 {
 	struct inode *inode = new_inode(sb);
 
@@ -23,6 +24,7 @@
 		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_flags = is_removable ? 0 : S_IMMUTABLE;
 		switch (mode & S_IFMT) {
 		case S_IFREG:
 			inode->i_fop = &efivarfs_file_operations;
@@ -102,22 +104,17 @@
 static int efivarfs_create(struct inode *dir, struct dentry *dentry,
 			  umode_t mode, bool excl)
 {
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct efivar_entry *var;
 	int namelen, i = 0, err = 0;
+	bool is_removable = false;
 
 	if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len))
 		return -EINVAL;
 
-	inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
-	if (!inode)
-		return -ENOMEM;
-
 	var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
-	if (!var) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!var)
+		return -ENOMEM;
 
 	/* length of the variable name itself: remove GUID and separator */
 	namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
@@ -125,6 +122,16 @@
 	efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1,
 			&var->var.VendorGuid);
 
+	if (efivar_variable_is_removable(var->var.VendorGuid,
+					 dentry->d_name.name, namelen))
+		is_removable = true;
+
+	inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0, is_removable);
+	if (!inode) {
+		err = -ENOMEM;
+		goto out;
+	}
+
 	for (i = 0; i < namelen; i++)
 		var->var.VariableName[i] = dentry->d_name.name[i];
 
@@ -138,7 +145,8 @@
 out:
 	if (err) {
 		kfree(var);
-		iput(inode);
+		if (inode)
+			iput(inode);
 	}
 	return err;
 }
diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
index b5ff16a..b450518 100644
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -15,7 +15,8 @@
 extern const struct inode_operations efivarfs_dir_inode_operations;
 extern bool efivarfs_valid_name(const char *str, int len);
 extern struct inode *efivarfs_get_inode(struct super_block *sb,
-			const struct inode *dir, int mode, dev_t dev);
+			const struct inode *dir, int mode, dev_t dev,
+			bool is_removable);
 
 extern struct list_head efivarfs_list;
 
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index b8a564f..dd029d1 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -118,8 +118,9 @@
 	struct dentry *dentry, *root = sb->s_root;
 	unsigned long size = 0;
 	char *name;
-	int len, i;
+	int len;
 	int err = -ENOMEM;
+	bool is_removable = false;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
@@ -128,15 +129,17 @@
 	memcpy(entry->var.VariableName, name16, name_size);
 	memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
 
-	len = ucs2_strlen(entry->var.VariableName);
+	len = ucs2_utf8size(entry->var.VariableName);
 
 	/* name, plus '-', plus GUID, plus NUL*/
 	name = kmalloc(len + 1 + EFI_VARIABLE_GUID_LEN + 1, GFP_KERNEL);
 	if (!name)
 		goto fail;
 
-	for (i = 0; i < len; i++)
-		name[i] = entry->var.VariableName[i] & 0xFF;
+	ucs2_as_utf8(name, entry->var.VariableName, len);
+
+	if (efivar_variable_is_removable(entry->var.VendorGuid, name, len))
+		is_removable = true;
 
 	name[len] = '-';
 
@@ -144,7 +147,8 @@
 
 	name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
 
-	inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0);
+	inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
+				   is_removable);
 	if (!inode)
 		goto fail_name;
 
@@ -200,7 +204,7 @@
 	sb->s_d_op		= &efivarfs_d_ops;
 	sb->s_time_gran         = 1;
 
-	inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
+	inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true);
 	if (!inode)
 		return -ENOMEM;
 	inode->i_op = &efivarfs_dir_inode_operations;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ae1dbcf..cde6074 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -94,6 +94,11 @@
 /* Epoll private bits inside the event mask */
 #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
 
+#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+				EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
+
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
@@ -1068,7 +1073,22 @@
 	 * wait list.
 	 */
 	if (waitqueue_active(&ep->wq)) {
-		ewake = 1;
+		if ((epi->event.events & EPOLLEXCLUSIVE) &&
+					!((unsigned long)key & POLLFREE)) {
+			switch ((unsigned long)key & EPOLLINOUT_BITS) {
+			case POLLIN:
+				if (epi->event.events & POLLIN)
+					ewake = 1;
+				break;
+			case POLLOUT:
+				if (epi->event.events & POLLOUT)
+					ewake = 1;
+				break;
+			case 0:
+				ewake = 1;
+				break;
+			}
+		}
 		wake_up_locked(&ep->wq);
 	}
 	if (waitqueue_active(&ep->poll_wait))
@@ -1875,9 +1895,13 @@
 	 * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
 	 * Also, we do not currently supported nested exclusive wakeups.
 	 */
-	if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD ||
-		(op == EPOLL_CTL_ADD && is_file_epoll(tf.file))))
-		goto error_tgt_fput;
+	if (epds.events & EPOLLEXCLUSIVE) {
+		if (op == EPOLL_CTL_MOD)
+			goto error_tgt_fput;
+		if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
+				(epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+			goto error_tgt_fput;
+	}
 
 	/*
 	 * At this point it is safe to assume that the "private_data" contains
@@ -1950,8 +1974,10 @@
 		break;
 	case EPOLL_CTL_MOD:
 		if (epi) {
-			epds.events |= POLLERR | POLLHUP;
-			error = ep_modify(ep, epi, &epds);
+			if (!(epi->event.events & EPOLLEXCLUSIVE)) {
+				epds.events |= POLLERR | POLLHUP;
+				error = ep_modify(ep, epi, &epds);
+			}
 		} else
 			error = -ENOENT;
 		break;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2c88d68..c1400b1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -80,23 +80,6 @@
 	return ret;
 }
 
-static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct inode *inode = file_inode(vma->vm_file);
-	struct ext2_inode_info *ei = EXT2_I(inode);
-	int ret;
-
-	sb_start_pagefault(inode->i_sb);
-	file_update_time(vma->vm_file);
-	down_read(&ei->dax_sem);
-
-	ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
-
-	up_read(&ei->dax_sem);
-	sb_end_pagefault(inode->i_sb);
-	return ret;
-}
-
 static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 		struct vm_fault *vmf)
 {
@@ -124,7 +107,7 @@
 static const struct vm_operations_struct ext2_dax_vm_ops = {
 	.fault		= ext2_dax_fault,
 	.pmd_fault	= ext2_dax_pmd_fault,
-	.page_mkwrite	= ext2_dax_mkwrite,
+	.page_mkwrite	= ext2_dax_fault,
 	.pfn_mkwrite	= ext2_dax_pfn_mkwrite,
 };
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 338eefd..6bd58e6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -737,8 +737,10 @@
 		 * so that it's not found by another thread before it's
 		 * initialised
 		 */
-		err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
-						1 << inode->i_blkbits);
+		err = dax_clear_sectors(inode->i_sb->s_bdev,
+				le32_to_cpu(chain[depth-1].key) <<
+				(inode->i_blkbits - 9),
+				1 << inode->i_blkbits);
 		if (err) {
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
@@ -874,6 +876,14 @@
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
+#ifdef CONFIG_FS_DAX
+	if (dax_mapping(mapping)) {
+		return dax_writeback_mapping_range(mapping,
+						   mapping->host->i_sb->s_bdev,
+						   wbc);
+	}
+#endif
+
 	return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
@@ -1296,7 +1306,7 @@
 		inode->i_flags |= S_NOATIME;
 	if (flags & EXT2_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
-	if (test_opt(inode->i_sb, DAX))
+	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
 		inode->i_flags |= S_DAX;
 }
 
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ec0668a..fe1f50f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -191,7 +191,6 @@
 	/* If checksum is bad mark all blocks used to prevent allocation
 	 * essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		ext4_error(sb, "Checksum bad for group %u", block_group);
 		grp = ext4_get_group_info(sb, block_group);
 		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
 			percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -442,14 +441,16 @@
 	}
 	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-
 		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
 		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
-		if (err)
+		if (err) {
+			ext4_error(sb, "Failed to init block bitmap for group "
+				   "%u: %d", block_group, err);
 			goto out;
+		}
 		goto verify;
 	}
 	ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index c802120..38f7562 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -467,3 +467,59 @@
 		return size;
 	return 0;
 }
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct inode *dir = d_inode(dentry->d_parent);
+	struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info;
+	int dir_has_key, cached_with_key;
+
+	if (!ext4_encrypted_inode(dir))
+		return 0;
+
+	if (ci && ci->ci_keyring_key &&
+	    (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+					  (1 << KEY_FLAG_REVOKED) |
+					  (1 << KEY_FLAG_DEAD))))
+		ci = NULL;
+
+	/* this should eventually be an flag in d_flags */
+	cached_with_key = dentry->d_fsdata != NULL;
+	dir_has_key = (ci != NULL);
+
+	/*
+	 * If the dentry was cached without the key, and it is a
+	 * negative dentry, it might be a valid name.  We can't check
+	 * if the key has since been made available due to locking
+	 * reasons, so we fail the validation so ext4_lookup() can do
+	 * this check.
+	 *
+	 * We also fail the validation if the dentry was created with
+	 * the key present, but we no longer have the key, or vice versa.
+	 */
+	if ((!cached_with_key && d_is_negative(dentry)) ||
+	    (!cached_with_key && dir_has_key) ||
+	    (cached_with_key && !dir_has_key)) {
+#if 0				/* Revalidation debug */
+		char buf[80];
+		char *cp = simple_dname(dentry, buf, sizeof(buf));
+
+		if (IS_ERR(cp))
+			cp = (char *) "???";
+		pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
+		       cached_with_key, d_is_negative(dentry),
+		       dir_has_key);
+#endif
+		return 0;
+	}
+	return 1;
+}
+
+const struct dentry_operations ext4_encrypted_d_ops = {
+	.d_revalidate = ext4_d_revalidate,
+};
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca7..33f5e2a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@
 	int dir_has_error = 0;
 	struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
 
+	if (ext4_encrypted_inode(inode)) {
+		err = ext4_get_encryption_info(inode);
+		if (err && err != -ENOKEY)
+			return err;
+	}
+
 	if (is_dx_dir(inode)) {
 		err = ext4_dx_readdir(file, ctx);
 		if (err != ERR_BAD_DX_DIR) {
@@ -157,8 +163,11 @@
 					index, 1);
 			file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 			bh = ext4_bread(NULL, inode, map.m_lblk, 0);
-			if (IS_ERR(bh))
-				return PTR_ERR(bh);
+			if (IS_ERR(bh)) {
+				err = PTR_ERR(bh);
+				bh = NULL;
+				goto errout;
+			}
 		}
 
 		if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b28..157b458 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2302,6 +2302,7 @@
 int ext4_decrypt(struct page *page);
 int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
 			   ext4_fsblk_t pblk, ext4_lblk_t len);
+extern const struct dentry_operations ext4_encrypted_d_ops;
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_init_crypto(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0ffabaf..3753ceb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3928,7 +3928,7 @@
 convert_initialized_extent(handle_t *handle, struct inode *inode,
 			   struct ext4_map_blocks *map,
 			   struct ext4_ext_path **ppath, int flags,
-			   unsigned int allocated, ext4_fsblk_t newblock)
+			   unsigned int allocated)
 {
 	struct ext4_ext_path *path = *ppath;
 	struct ext4_extent *ex;
@@ -4347,7 +4347,7 @@
 			    (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
 				allocated = convert_initialized_extent(
 						handle, inode, map, &path,
-						flags, allocated, newblock);
+						flags, allocated);
 				goto out2;
 			} else if (!ext4_ext_is_unwritten(ex))
 				goto out;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 1126436..4cd318f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -262,23 +262,8 @@
 	return result;
 }
 
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	int err;
-	struct inode *inode = file_inode(vma->vm_file);
-
-	sb_start_pagefault(inode->i_sb);
-	file_update_time(vma->vm_file);
-	down_read(&EXT4_I(inode)->i_mmap_sem);
-	err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
-	up_read(&EXT4_I(inode)->i_mmap_sem);
-	sb_end_pagefault(inode->i_sb);
-
-	return err;
-}
-
 /*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
  * handler we check for races agaist truncate. Note that since we cycle through
  * i_mmap_sem, we are sure that also any hole punching that began before we
  * were called is finished by now and so if it included part of the file we
@@ -311,7 +296,7 @@
 static const struct vm_operations_struct ext4_dax_vm_ops = {
 	.fault		= ext4_dax_fault,
 	.pmd_fault	= ext4_dax_pmd_fault,
-	.page_mkwrite	= ext4_dax_mkwrite,
+	.page_mkwrite	= ext4_dax_fault,
 	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
 };
 #else
@@ -350,6 +335,7 @@
 	struct super_block *sb = inode->i_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct vfsmount *mnt = filp->f_path.mnt;
+	struct inode *dir = filp->f_path.dentry->d_parent->d_inode;
 	struct path path;
 	char buf[64], *cp;
 	int ret;
@@ -393,6 +379,14 @@
 		if (ext4_encryption_info(inode) == NULL)
 			return -ENOKEY;
 	}
+	if (ext4_encrypted_inode(dir) &&
+	    !ext4_is_child_context_consistent_with_parent(dir, inode)) {
+		ext4_warning(inode->i_sb,
+			     "Inconsistent encryption contexts: %lu/%lu\n",
+			     (unsigned long) dir->i_ino,
+			     (unsigned long) inode->i_ino);
+		return -EPERM;
+	}
 	/*
 	 * Set up the jbd2_inode if we are opening the inode for
 	 * writing and the journal is present
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3fcfd50..acc0ad5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,7 +76,6 @@
 	/* If checksum is bad mark all blocks and inodes use to prevent
 	 * allocation, essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		ext4_error(sb, "Checksum bad for group %u", block_group);
 		grp = ext4_get_group_info(sb, block_group);
 		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
 			percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -191,8 +190,11 @@
 		set_buffer_verified(bh);
 		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
-		if (err)
+		if (err) {
+			ext4_error(sb, "Failed to init inode bitmap for group "
+				   "%u: %d", block_group, err);
 			goto out;
+		}
 		return bh;
 	}
 	ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bf..aee960b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -686,6 +686,34 @@
 	return retval;
 }
 
+/*
+ * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages
+ * we have to be careful as someone else may be manipulating b_state as well.
+ */
+static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
+{
+	unsigned long old_state;
+	unsigned long new_state;
+
+	flags &= EXT4_MAP_FLAGS;
+
+	/* Dummy buffer_head? Set non-atomically. */
+	if (!bh->b_page) {
+		bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
+		return;
+	}
+	/*
+	 * Someone else may be modifying b_state. Be careful! This is ugly but
+	 * once we get rid of using bh as a container for mapping information
+	 * to pass to / from get_block functions, this can go away.
+	 */
+	do {
+		old_state = READ_ONCE(bh->b_state);
+		new_state = (old_state & ~EXT4_MAP_FLAGS) | flags;
+	} while (unlikely(
+		 cmpxchg(&bh->b_state, old_state, new_state) != old_state));
+}
+
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 
@@ -722,7 +750,7 @@
 		ext4_io_end_t *io_end = ext4_inode_aio(inode);
 
 		map_bh(bh, inode->i_sb, map.m_pblk);
-		bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+		ext4_update_bh_state(bh, map.m_flags);
 		if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
 			set_buffer_defer_completion(bh);
 		bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -1685,7 +1713,7 @@
 		return ret;
 
 	map_bh(bh, inode->i_sb, map.m_pblk);
-	bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+	ext4_update_bh_state(bh, map.m_flags);
 
 	if (buffer_unwritten(bh)) {
 		/* A delayed write to unwritten bh should be marked
@@ -2450,6 +2478,10 @@
 
 	trace_ext4_writepages(inode, wbc);
 
+	if (dax_mapping(mapping))
+		return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+						   wbc);
+
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
 	 * a transaction for special inodes like journal inode on last iput()
@@ -3253,29 +3285,29 @@
 	 * case, we allocate an io_end structure to hook to the iocb.
 	 */
 	iocb->private = NULL;
-	ext4_inode_aio_set(inode, NULL);
-	if (!is_sync_kiocb(iocb)) {
-		io_end = ext4_init_io_end(inode, GFP_NOFS);
-		if (!io_end) {
-			ret = -ENOMEM;
-			goto retake_lock;
-		}
-		/*
-		 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
-		 */
-		iocb->private = ext4_get_io_end(io_end);
-		/*
-		 * we save the io structure for current async direct
-		 * IO, so that later ext4_map_blocks() could flag the
-		 * io structure whether there is a unwritten extents
-		 * needs to be converted when IO is completed.
-		 */
-		ext4_inode_aio_set(inode, io_end);
-	}
-
 	if (overwrite) {
 		get_block_func = ext4_get_block_overwrite;
 	} else {
+		ext4_inode_aio_set(inode, NULL);
+		if (!is_sync_kiocb(iocb)) {
+			io_end = ext4_init_io_end(inode, GFP_NOFS);
+			if (!io_end) {
+				ret = -ENOMEM;
+				goto retake_lock;
+			}
+			/*
+			 * Grab reference for DIO. Will be dropped in
+			 * ext4_end_io_dio()
+			 */
+			iocb->private = ext4_get_io_end(io_end);
+			/*
+			 * we save the io structure for current async direct
+			 * IO, so that later ext4_map_blocks() could flag the
+			 * io structure whether there is a unwritten extents
+			 * needs to be converted when IO is completed.
+			 */
+			ext4_inode_aio_set(inode, io_end);
+		}
 		get_block_func = ext4_get_block_write;
 		dio_flags = DIO_LOCKING;
 	}
@@ -4127,7 +4159,7 @@
 		new_fl |= S_NOATIME;
 	if (flags & EXT4_DIRSYNC_FL)
 		new_fl |= S_DIRSYNC;
-	if (test_opt(inode->i_sb, DAX))
+	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
 		new_fl |= S_DAX;
 	inode_set_flags(inode, new_fl,
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f6c369..eae5917 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -208,7 +208,7 @@
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	handle_t *handle = NULL;
-	int err = EPERM, migrate = 0;
+	int err = -EPERM, migrate = 0;
 	struct ext4_iloc iloc;
 	unsigned int oldflags, mask, i;
 	unsigned int jflag;
@@ -583,6 +583,11 @@
 				 "Online defrag not supported with bigalloc");
 			err = -EOPNOTSUPP;
 			goto mext_out;
+		} else if (IS_DAX(inode)) {
+			ext4_msg(sb, KERN_ERR,
+				 "Online defrag not supported with DAX");
+			err = -EOPNOTSUPP;
+			goto mext_out;
 		}
 
 		err = mnt_want_write_file(filp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 61eaf74..4424b7b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2285,7 +2285,7 @@
 	if (group == 0)
 		seq_puts(seq, "#group: free  frags first ["
 			      " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
-			      " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]");
+			      " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
 
 	i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
 		sizeof(struct ext4_group_info);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index fb6f117..4098acc 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -265,11 +265,12 @@
 	ext4_lblk_t orig_blk_offset, donor_blk_offset;
 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
 	unsigned int tmp_data_size, data_size, replaced_size;
-	int err2, jblocks, retries = 0;
+	int i, err2, jblocks, retries = 0;
 	int replaced_count = 0;
 	int from = data_offset_in_page << orig_inode->i_blkbits;
 	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
 	struct super_block *sb = orig_inode->i_sb;
+	struct buffer_head *bh = NULL;
 
 	/*
 	 * It needs twice the amount of ordinary journal buffers because
@@ -380,8 +381,17 @@
 	}
 	/* Perform all necessary steps similar write_begin()/write_end()
 	 * but keeping in mind that i_size will not change */
-	*err = __block_write_begin(pagep[0], from, replaced_size,
-				   ext4_get_block);
+	if (!page_has_buffers(pagep[0]))
+		create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
+	bh = page_buffers(pagep[0]);
+	for (i = 0; i < data_offset_in_page; i++)
+		bh = bh->b_this_page;
+	for (i = 0; i < block_len_in_page; i++) {
+		*err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
+		if (*err < 0)
+			break;
+		bh = bh->b_this_page;
+	}
 	if (!*err)
 		*err = block_commit_write(pagep[0], from, from + replaced_size);
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 06574dd..48e4b89 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1558,6 +1558,24 @@
 	struct ext4_dir_entry_2 *de;
 	struct buffer_head *bh;
 
+       if (ext4_encrypted_inode(dir)) {
+               int res = ext4_get_encryption_info(dir);
+
+		/*
+		 * This should be a properly defined flag for
+		 * dentry->d_flags when we uplift this to the VFS.
+		 * d_fsdata is set to (void *) 1 if if the dentry is
+		 * created while the directory was encrypted and we
+		 * don't have access to the key.
+		 */
+	       dentry->d_fsdata = NULL;
+	       if (ext4_encryption_info(dir))
+		       dentry->d_fsdata = (void *) 1;
+	       d_set_d_op(dentry, &ext4_encrypted_d_ops);
+	       if (res && res != -ENOKEY)
+		       return ERR_PTR(res);
+       }
+
 	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
@@ -1585,11 +1603,15 @@
 			return ERR_PTR(-EFSCORRUPTED);
 		}
 		if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
-		    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		     S_ISLNK(inode->i_mode)) &&
+		    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
 		    !ext4_is_child_context_consistent_with_parent(dir,
 								  inode)) {
+			int nokey = ext4_encrypted_inode(inode) &&
+				!ext4_encryption_info(inode);
+
 			iput(inode);
+			if (nokey)
+				return ERR_PTR(-ENOKEY);
 			ext4_warning(inode->i_sb,
 				     "Inconsistent encryption contexts: %lu/%lu\n",
 				     (unsigned long) dir->i_ino,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ad62d7a..34038e3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -198,7 +198,7 @@
 	if (flex_gd == NULL)
 		goto out3;
 
-	if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
+	if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
 		goto out2;
 	flex_gd->count = flexbg_size;
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6915c95..5c46ed9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@
 #define WB_FRN_HIST_MAX_SLOTS	(WB_FRN_HIST_THR_SLOTS / 2 + 1)
 					/* one round can affect upto 5 slots */
 
+static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
+static struct workqueue_struct *isw_wq;
+
 void __inode_attach_wb(struct inode *inode, struct page *page)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -424,6 +427,8 @@
 
 	iput(inode);
 	kfree(isw);
+
+	atomic_dec(&isw_nr_in_flight);
 }
 
 static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -433,7 +438,7 @@
 
 	/* needs to grab bh-unsafe locks, bounce to work item */
 	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
-	schedule_work(&isw->work);
+	queue_work(isw_wq, &isw->work);
 }
 
 /**
@@ -469,7 +474,8 @@
 
 	/* while holding I_WB_SWITCH, no one else can update the association */
 	spin_lock(&inode->i_lock);
-	if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+	if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
 	    inode_to_wb(inode) == isw->new_wb) {
 		spin_unlock(&inode->i_lock);
 		goto out_free;
@@ -480,6 +486,8 @@
 	ihold(inode);
 	isw->inode = inode;
 
+	atomic_inc(&isw_nr_in_flight);
+
 	/*
 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
 	 * the RCU protected stat update paths to grab the mapping's
@@ -840,6 +848,33 @@
 		wb_put(last_wb);
 }
 
+/**
+ * cgroup_writeback_umount - flush inode wb switches for umount
+ *
+ * This function is called when a super_block is about to be destroyed and
+ * flushes in-flight inode wb switches.  An inode wb switch goes through
+ * RCU and then workqueue, so the two need to be flushed in order to ensure
+ * that all previously scheduled switches are finished.  As wb switches are
+ * rare occurrences and synchronize_rcu() can take a while, perform
+ * flushing iff wb switches are in flight.
+ */
+void cgroup_writeback_umount(void)
+{
+	if (atomic_read(&isw_nr_in_flight)) {
+		synchronize_rcu();
+		flush_workqueue(isw_wq);
+	}
+}
+
+static int __init cgroup_writeback_init(void)
+{
+	isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
+	if (!isw_wq)
+		return -ENOMEM;
+	return 0;
+}
+fs_initcall(cgroup_writeback_init);
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static struct bdi_writeback *
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 506765a..bb8d67e 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -376,12 +376,11 @@
 	struct inode *inode = d_inode(dentry);
 	dnode_secno dno;
 	int r;
-	int rep = 0;
 	int err;
 
 	hpfs_lock(dir->i_sb);
 	hpfs_adjust_length(name, &len);
-again:
+
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
 	if (!de)
@@ -401,33 +400,9 @@
 		hpfs_error(dir->i_sb, "there was error when removing dirent");
 		err = -EFSERROR;
 		break;
-	case 2:		/* no space for deleting, try to truncate file */
-
+	case 2:		/* no space for deleting */
 		err = -ENOSPC;
-		if (rep++)
-			break;
-
-		dentry_unhash(dentry);
-		if (!d_unhashed(dentry)) {
-			hpfs_unlock(dir->i_sb);
-			return -ENOSPC;
-		}
-		if (generic_permission(inode, MAY_WRITE) ||
-		    !S_ISREG(inode->i_mode) ||
-		    get_write_access(inode)) {
-			d_rehash(dentry);
-		} else {
-			struct iattr newattrs;
-			/*pr_info("truncating file before delete.\n");*/
-			newattrs.ia_size = 0;
-			newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-			err = notify_change(dentry, &newattrs, NULL);
-			put_write_access(inode);
-			if (!err)
-				goto again;
-		}
-		hpfs_unlock(dir->i_sb);
-		return -ENOSPC;
+		break;
 	default:
 		drop_nlink(inode);
 		err = 0;
diff --git a/fs/inode.c b/fs/inode.c
index 9f62db3..69b8b52 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -154,6 +154,12 @@
 	inode->i_rdev = 0;
 	inode->dirtied_when = 0;
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+	inode->i_wb_frn_winner = 0;
+	inode->i_wb_frn_avg_time = 0;
+	inode->i_wb_frn_history = 0;
+#endif
+
 	if (security_inode_alloc(inode))
 		goto out;
 	spin_lock_init(&inode->i_lock);
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index 3ea3655..8918ac9 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -2,10 +2,6 @@
 	JFFS2 LOCKING DOCUMENTATION
 	---------------------------
 
-At least theoretically, JFFS2 does not require the Big Kernel Lock
-(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
-code. It has its own locking, as described below.
-
 This document attempts to describe the existing locking rules for
 JFFS2. It is not expected to remain perfectly up to date, but ought to
 be fairly close.
@@ -69,6 +65,7 @@
 	   any f->sem held.
 	2. Never attempt to lock two file mutexes in one thread.
 	   No ordering rules have been made for doing so.
+	3. Never lock a page cache page with f->sem held.
 
 
 	erase_completion_lock spinlock
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 0ae91ad..b288c8a 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -50,7 +50,8 @@
 
 
 static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
-				    struct jffs2_inode_cache *ic)
+				    struct jffs2_inode_cache *ic,
+				    int *dir_hardlinks)
 {
 	struct jffs2_full_dirent *fd;
 
@@ -69,19 +70,21 @@
 			dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
 				  fd->name, fd->ino, ic->ino);
 			jffs2_mark_node_obsolete(c, fd->raw);
+			/* Clear the ic/raw union so it doesn't cause problems later. */
+			fd->ic = NULL;
 			continue;
 		}
 
+		/* From this point, fd->raw is no longer used so we can set fd->ic */
+		fd->ic = child_ic;
+		child_ic->pino_nlink++;
+		/* If we appear (at this stage) to have hard-linked directories,
+		 * set a flag to trigger a scan later */
 		if (fd->type == DT_DIR) {
-			if (child_ic->pino_nlink) {
-				JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
-					    fd->name, fd->ino, ic->ino);
-				/* TODO: What do we do about it? */
-			} else {
-				child_ic->pino_nlink = ic->ino;
-			}
-		} else
-			child_ic->pino_nlink++;
+			child_ic->flags |= INO_FLAGS_IS_DIR;
+			if (child_ic->pino_nlink > 1)
+				*dir_hardlinks = 1;
+		}
 
 		dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
 		/* Can't free scan_dents so far. We might need them in pass 2 */
@@ -95,8 +98,7 @@
 */
 static int jffs2_build_filesystem(struct jffs2_sb_info *c)
 {
-	int ret;
-	int i;
+	int ret, i, dir_hardlinks = 0;
 	struct jffs2_inode_cache *ic;
 	struct jffs2_full_dirent *fd;
 	struct jffs2_full_dirent *dead_fds = NULL;
@@ -120,7 +122,7 @@
 	/* Now scan the directory tree, increasing nlink according to every dirent found. */
 	for_each_inode(i, c, ic) {
 		if (ic->scan_dents) {
-			jffs2_build_inode_pass1(c, ic);
+			jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
 			cond_resched();
 		}
 	}
@@ -156,6 +158,20 @@
 	}
 
 	dbg_fsbuild("pass 2a complete\n");
+
+	if (dir_hardlinks) {
+		/* If we detected directory hardlinks earlier, *hopefully*
+		 * they are gone now because some of the links were from
+		 * dead directories which still had some old dirents lying
+		 * around and not yet garbage-collected, but which have
+		 * been discarded above. So clear the pino_nlink field
+		 * in each directory, so that the final scan below can
+		 * print appropriate warnings. */
+		for_each_inode(i, c, ic) {
+			if (ic->flags & INO_FLAGS_IS_DIR)
+				ic->pino_nlink = 0;
+		}
+	}
 	dbg_fsbuild("freeing temporary data structures\n");
 
 	/* Finally, we can scan again and free the dirent structs */
@@ -163,6 +179,33 @@
 		while(ic->scan_dents) {
 			fd = ic->scan_dents;
 			ic->scan_dents = fd->next;
+			/* We do use the pino_nlink field to count nlink of
+			 * directories during fs build, so set it to the
+			 * parent ino# now. Now that there's hopefully only
+			 * one. */
+			if (fd->type == DT_DIR) {
+				if (!fd->ic) {
+					/* We'll have complained about it and marked the coresponding
+					   raw node obsolete already. Just skip it. */
+					continue;
+				}
+
+				/* We *have* to have set this in jffs2_build_inode_pass1() */
+				BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
+
+				/* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
+				 * is set. Otherwise, we know this should never trigger anyway, so
+				 * we don't do the check. And ic->pino_nlink still contains the nlink
+				 * value (which is 1). */
+				if (dir_hardlinks && fd->ic->pino_nlink) {
+					JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
+						    fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
+					/* Should we unlink it from its previous parent? */
+				}
+
+				/* For directories, ic->pino_nlink holds that parent inode # */
+				fd->ic->pino_nlink = ic->ino;
+			}
 			jffs2_free_full_dirent(fd);
 		}
 		ic->scan_dents = NULL;
@@ -241,11 +284,7 @@
 
 			/* Reduce nlink of the child. If it's now zero, stick it on the
 			   dead_fds list to be cleaned up later. Else just free the fd */
-
-			if (fd->type == DT_DIR)
-				child_ic->pino_nlink = 0;
-			else
-				child_ic->pino_nlink--;
+			child_ic->pino_nlink--;
 
 			if (!child_ic->pino_nlink) {
 				dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index d211b8e..30c4c9e 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -843,9 +843,14 @@
 
 		pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
 			  __func__, ret);
-		/* Might as well let the VFS know */
-		d_instantiate(new_dentry, d_inode(old_dentry));
-		ihold(d_inode(old_dentry));
+		/*
+		 * We can't keep the target in dcache after that.
+		 * For one thing, we can't afford dentry aliases for directories.
+		 * For another, if there was a victim, we _can't_ set new inode
+		 * for that sucker and we have to trigger mount eviction - the
+		 * caller won't do it on its own since we are returning an error.
+		 */
+		d_invalidate(new_dentry);
 		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
 		return ret;
 	}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c5ac594..cad86ba 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -137,39 +137,33 @@
 	struct page *pg;
 	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
-	struct jffs2_raw_inode ri;
-	uint32_t alloc_len = 0;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
 	int ret = 0;
 
+	pg = grab_cache_page_write_begin(mapping, index, flags);
+	if (!pg)
+		return -ENOMEM;
+	*pagep = pg;
+
 	jffs2_dbg(1, "%s()\n", __func__);
 
 	if (pageofs > inode->i_size) {
-		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
-					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
-		if (ret)
-			return ret;
-	}
-
-	mutex_lock(&f->sem);
-	pg = grab_cache_page_write_begin(mapping, index, flags);
-	if (!pg) {
-		if (alloc_len)
-			jffs2_complete_reservation(c);
-		mutex_unlock(&f->sem);
-		return -ENOMEM;
-	}
-	*pagep = pg;
-
-	if (alloc_len) {
 		/* Make new hole frag from old EOF to new page */
+		struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+		struct jffs2_raw_inode ri;
 		struct jffs2_full_dnode *fn;
+		uint32_t alloc_len;
 
 		jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
 			  (unsigned int)inode->i_size, pageofs);
 
+		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+		if (ret)
+			goto out_page;
+
+		mutex_lock(&f->sem);
 		memset(&ri, 0, sizeof(ri));
 
 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -196,6 +190,7 @@
 		if (IS_ERR(fn)) {
 			ret = PTR_ERR(fn);
 			jffs2_complete_reservation(c);
+			mutex_unlock(&f->sem);
 			goto out_page;
 		}
 		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -210,10 +205,12 @@
 			jffs2_mark_node_obsolete(c, fn->raw);
 			jffs2_free_full_dnode(fn);
 			jffs2_complete_reservation(c);
+			mutex_unlock(&f->sem);
 			goto out_page;
 		}
 		jffs2_complete_reservation(c);
 		inode->i_size = pageofs;
+		mutex_unlock(&f->sem);
 	}
 
 	/*
@@ -222,18 +219,18 @@
 	 * case of a short-copy.
 	 */
 	if (!PageUptodate(pg)) {
+		mutex_lock(&f->sem);
 		ret = jffs2_do_readpage_nolock(inode, pg);
+		mutex_unlock(&f->sem);
 		if (ret)
 			goto out_page;
 	}
-	mutex_unlock(&f->sem);
 	jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
 	return ret;
 
 out_page:
 	unlock_page(pg);
 	page_cache_release(pg);
-	mutex_unlock(&f->sem);
 	return ret;
 }
 
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 5a2dec2..95d5880 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -1296,14 +1296,17 @@
 		BUG_ON(start > orig_start);
 	}
 
-	/* First, use readpage() to read the appropriate page into the page cache */
-	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
-	 *    triggered garbage collection in the first place?
-	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
-	 *    page OK. We'll actually write it out again in commit_write, which is a little
-	 *    suboptimal, but at least we're correct.
-	 */
+	/* The rules state that we must obtain the page lock *before* f->sem, so
+	 * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
+	 * actually going to *change* so we're safe; we only allow reading.
+	 *
+	 * It is important to note that jffs2_write_begin() will ensure that its
+	 * page is marked Uptodate before allocating space. That means that if we
+	 * end up here trying to GC the *same* page that jffs2_write_begin() is
+	 * trying to write out, read_cache_page() will not deadlock. */
+	mutex_unlock(&f->sem);
 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
+	mutex_lock(&f->sem);
 
 	if (IS_ERR(pg_ptr)) {
 		pr_warn("read_cache_page() returned error: %ld\n",
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index fa35ff7..0637271 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -194,6 +194,7 @@
 #define INO_STATE_CLEARING	6	/* In clear_inode() */
 
 #define INO_FLAGS_XATTR_CHECKED	0x01	/* has no duplicate xattr_ref */
+#define INO_FLAGS_IS_DIR	0x02	/* is a directory */
 
 #define RAWNODE_CLASS_INODE_CACHE	0
 #define RAWNODE_CLASS_XATTR_DATUM	1
@@ -249,7 +250,10 @@
 
 struct jffs2_full_dirent
 {
-	struct jffs2_raw_node_ref *raw;
+	union {
+		struct jffs2_raw_node_ref *raw;
+		struct jffs2_inode_cache *ic; /* Just during part of build */
+	};
 	struct jffs2_full_dirent *next;
 	uint32_t version;
 	uint32_t ino; /* == zero for unlink */
diff --git a/fs/namei.c b/fs/namei.c
index f624d13..9c590e0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1712,6 +1712,11 @@
 		return 0;
 	if (!follow)
 		return 0;
+	/* make sure that d_is_symlink above matches inode */
+	if (nd->flags & LOOKUP_RCU) {
+		if (read_seqcount_retry(&link->dentry->d_seq, seq))
+			return -ECHILD;
+	}
 	return pick_link(nd, link, inode, seq);
 }
 
@@ -1743,11 +1748,11 @@
 		if (err < 0)
 			return err;
 
-		inode = d_backing_inode(path.dentry);
 		seq = 0;	/* we are already out of RCU mode */
 		err = -ENOENT;
 		if (d_is_negative(path.dentry))
 			goto out_path_put;
+		inode = d_backing_inode(path.dentry);
 	}
 
 	if (flags & WALK_PUT)
@@ -3192,12 +3197,12 @@
 		return error;
 
 	BUG_ON(nd->flags & LOOKUP_RCU);
-	inode = d_backing_inode(path.dentry);
 	seq = 0;	/* out of RCU mode, so the value doesn't matter */
 	if (unlikely(d_is_negative(path.dentry))) {
 		path_to_nameidata(&path, nd);
 		return -ENOENT;
 	}
+	inode = d_backing_inode(path.dentry);
 finish_lookup:
 	if (nd->depth)
 		put_link(nd);
@@ -3206,11 +3211,6 @@
 	if (unlikely(error))
 		return error;
 
-	if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
-		path_to_nameidata(&path, nd);
-		return -ELOOP;
-	}
-
 	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
 		path_to_nameidata(&path, nd);
 	} else {
@@ -3229,6 +3229,10 @@
 		return error;
 	}
 	audit_inode(nd->name, nd->path.dentry, 0);
+	if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
+		error = -ELOOP;
+		goto out;
+	}
 	error = -EISDIR;
 	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
 		goto out;
@@ -3273,6 +3277,10 @@
 			goto exit_fput;
 	}
 out:
+	if (unlikely(error > 0)) {
+		WARN_ON(1);
+		error = -EINVAL;
+	}
 	if (got_write)
 		mnt_drop_write(nd->path.mnt);
 	path_put(&save_parent);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 26c2de2..b7f8eae 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -633,7 +633,7 @@
 				d_rehash(newdent);
 		} else {
 			spin_lock(&dentry->d_lock);
-			NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE;
+			NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE;
 			spin_unlock(&dentry->d_lock);
 		}
 	} else {
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c..35ab51c 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -476,6 +476,7 @@
 
 		for (i = 0; i < nr_pages; i++)
 			put_page(arg->layoutupdate_pages[i]);
+		vfree(arg->start_p);
 		kfree(arg->layoutupdate_pages);
 	} else {
 		put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@
 
 	if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
 		void *p = start_p, *end = p + arg->layoutupdate_len;
+		struct page *page = NULL;
 		int i = 0;
 
-		for ( ; p < end; p += PAGE_SIZE)
-			arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
+		arg->start_p = start_p;
+		for ( ; p < end; p += PAGE_SIZE) {
+			page = vmalloc_to_page(p);
+			arg->layoutupdate_pages[i++] = page;
+			get_page(page);
+		}
 	}
 
 	dprintk("%s found %zu ranges\n", __func__, count);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5bcd92d..0cb1abd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1215,7 +1215,7 @@
 					hdr->pgio_mirror_idx + 1,
 					&hdr->pgio_mirror_idx))
 			goto out_eagain;
-		set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+		set_bit(NFS_LAYOUT_RETURN_REQUESTED,
 			&hdr->lseg->pls_layout->plh_flags);
 		pnfs_read_resend_pnfs(hdr);
 		return task->tk_status;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 29898a9..eb37046 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -412,7 +412,7 @@
 					 OP_ILLEGAL, GFP_NOIO);
 		if (!fail_return) {
 			if (ff_layout_has_available_ds(lseg))
-				set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+				set_bit(NFS_LAYOUT_RETURN_REQUESTED,
 					&lseg->pls_layout->plh_flags);
 			else
 				pnfs_error_mark_layout_for_return(ino, lseg);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index bd25dc7..dff8346 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -16,29 +16,8 @@
 
 #define NFSDBG_FACILITY NFSDBG_PROC
 
-static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
-				fmode_t fmode)
-{
-	struct nfs_open_context *open;
-	struct nfs_lock_context *lock;
-	int ret;
-
-	open = get_nfs_open_context(nfs_file_open_context(file));
-	lock = nfs_get_lock_context(open);
-	if (IS_ERR(lock)) {
-		put_nfs_open_context(open);
-		return PTR_ERR(lock);
-	}
-
-	ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
-
-	nfs_put_lock_context(lock);
-	put_nfs_open_context(open);
-	return ret;
-}
-
 static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
-				 loff_t offset, loff_t len)
+		struct nfs_lock_context *lock, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(filep);
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@
 	msg->rpc_argp = &args;
 	msg->rpc_resp = &res;
 
-	status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+	status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
+			lock, FMODE_WRITE);
 	if (status)
 		return status;
 
@@ -78,15 +58,26 @@
 {
 	struct nfs_server *server = NFS_SERVER(file_inode(filep));
 	struct nfs4_exception exception = { };
+	struct nfs_lock_context *lock;
 	int err;
 
+	lock = nfs_get_lock_context(nfs_file_open_context(filep));
+	if (IS_ERR(lock))
+		return PTR_ERR(lock);
+
+	exception.inode = file_inode(filep);
+	exception.state = lock->open_context->state;
+
 	do {
-		err = _nfs42_proc_fallocate(msg, filep, offset, len);
-		if (err == -ENOTSUPP)
-			return -EOPNOTSUPP;
+		err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
+		if (err == -ENOTSUPP) {
+			err = -EOPNOTSUPP;
+			break;
+		}
 		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 
+	nfs_put_lock_context(lock);
 	return err;
 }
 
@@ -135,7 +126,8 @@
 	return err;
 }
 
-static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep,
+		struct nfs_lock_context *lock, loff_t offset, int whence)
 {
 	struct inode *inode = file_inode(filep);
 	struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@
 	if (!nfs_server_capable(inode, NFS_CAP_SEEK))
 		return -ENOTSUPP;
 
-	status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+	status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
+			lock, FMODE_READ);
 	if (status)
 		return status;
 
@@ -175,17 +168,28 @@
 {
 	struct nfs_server *server = NFS_SERVER(file_inode(filep));
 	struct nfs4_exception exception = { };
+	struct nfs_lock_context *lock;
 	loff_t err;
 
+	lock = nfs_get_lock_context(nfs_file_open_context(filep));
+	if (IS_ERR(lock))
+		return PTR_ERR(lock);
+
+	exception.inode = file_inode(filep);
+	exception.state = lock->open_context->state;
+
 	do {
-		err = _nfs42_proc_llseek(filep, offset, whence);
+		err = _nfs42_proc_llseek(filep, lock, offset, whence);
 		if (err >= 0)
 			break;
-		if (err == -ENOTSUPP)
-			return -EOPNOTSUPP;
+		if (err == -ENOTSUPP) {
+			err = -EOPNOTSUPP;
+			break;
+		}
 		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 
+	nfs_put_lock_context(lock);
 	return err;
 }
 
@@ -298,8 +302,9 @@
 }
 
 static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
-			     struct file *dst_f, loff_t src_offset,
-			     loff_t dst_offset, loff_t count)
+		struct file *dst_f, struct nfs_lock_context *src_lock,
+		struct nfs_lock_context *dst_lock, loff_t src_offset,
+		loff_t dst_offset, loff_t count)
 {
 	struct inode *src_inode = file_inode(src_f);
 	struct inode *dst_inode = file_inode(dst_f);
@@ -320,11 +325,13 @@
 	msg->rpc_argp = &args;
 	msg->rpc_resp = &res;
 
-	status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+			src_lock, FMODE_READ);
 	if (status)
 		return status;
 
-	status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+			dst_lock, FMODE_WRITE);
 	if (status)
 		return status;
 
@@ -349,22 +356,48 @@
 	};
 	struct inode *inode = file_inode(src_f);
 	struct nfs_server *server = NFS_SERVER(file_inode(src_f));
-	struct nfs4_exception exception = { };
-	int err;
+	struct nfs_lock_context *src_lock;
+	struct nfs_lock_context *dst_lock;
+	struct nfs4_exception src_exception = { };
+	struct nfs4_exception dst_exception = { };
+	int err, err2;
 
 	if (!nfs_server_capable(inode, NFS_CAP_CLONE))
 		return -EOPNOTSUPP;
 
+	src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
+	if (IS_ERR(src_lock))
+		return PTR_ERR(src_lock);
+
+	src_exception.inode = file_inode(src_f);
+	src_exception.state = src_lock->open_context->state;
+
+	dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
+	if (IS_ERR(dst_lock)) {
+		err = PTR_ERR(dst_lock);
+		goto out_put_src_lock;
+	}
+
+	dst_exception.inode = file_inode(dst_f);
+	dst_exception.state = dst_lock->open_context->state;
+
 	do {
-		err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
-					dst_offset, count);
+		err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
+					src_offset, dst_offset, count);
 		if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
-			return -EOPNOTSUPP;
+			err = -EOPNOTSUPP;
+			break;
 		}
-		err = nfs4_handle_exception(server, err, &exception);
-	} while (exception.retry);
 
+		err2 = nfs4_handle_exception(server, err, &src_exception);
+		err = nfs4_handle_exception(server, err, &dst_exception);
+		if (!err)
+			err = err2;
+	} while (src_exception.retry || dst_exception.retry);
+
+	nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+	nfs_put_lock_context(src_lock);
 	return err;
-
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33a..1488159 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2466,9 +2466,9 @@
 		dentry = d_add_unique(dentry, igrab(state->inode));
 		if (dentry == NULL) {
 			dentry = opendata->dentry;
-		} else if (dentry != ctx->dentry) {
+		} else {
 			dput(ctx->dentry);
-			ctx->dentry = dget(dentry);
+			ctx->dentry = dentry;
 		}
 		nfs_set_verifier(dentry,
 				nfs_save_change_attribute(d_inode(opendata->dir)));
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3592cc..2fa483e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -52,9 +52,7 @@
  */
 static LIST_HEAD(pnfs_modules_tbl);
 
-static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
-		       enum pnfs_iomode iomode, bool sync);
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
 
 /* Return the registered pnfs layout driver module matching given id */
 static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@
 {
 	struct inode *inode = lo->plh_inode;
 
+	pnfs_layoutreturn_before_put_layout_hdr(lo);
+
 	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
 		if (!list_empty(&lo->plh_segs))
 			WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -252,6 +252,27 @@
 	}
 }
 
+/*
+ * Mark a pnfs_layout_hdr and all associated layout segments as invalid
+ *
+ * In order to continue using the pnfs_layout_hdr, a full recovery
+ * is required.
+ * Note that caller must hold inode->i_lock.
+ */
+static int
+pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+		struct list_head *lseg_list)
+{
+	struct pnfs_layout_range range = {
+		.iomode = IOMODE_ANY,
+		.offset = 0,
+		.length = NFS4_MAX_UINT64,
+	};
+
+	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+	return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
+}
+
 static int
 pnfs_iomode_to_fail_bit(u32 iomode)
 {
@@ -345,58 +366,6 @@
 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
-/* Return true if layoutreturn is needed */
-static bool
-pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
-			struct pnfs_layout_segment *lseg)
-{
-	struct pnfs_layout_segment *s;
-
-	if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-		return false;
-
-	list_for_each_entry(s, &lo->plh_segs, pls_list)
-		if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
-			return false;
-
-	return true;
-}
-
-static bool
-pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
-{
-	if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
-		return false;
-	lo->plh_return_iomode = 0;
-	pnfs_get_layout_hdr(lo);
-	clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
-	return true;
-}
-
-static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
-		struct pnfs_layout_hdr *lo, struct inode *inode)
-{
-	lo = lseg->pls_layout;
-	inode = lo->plh_inode;
-
-	spin_lock(&inode->i_lock);
-	if (pnfs_layout_need_return(lo, lseg)) {
-		nfs4_stateid stateid;
-		enum pnfs_iomode iomode;
-		bool send;
-
-		nfs4_stateid_copy(&stateid, &lo->plh_stateid);
-		iomode = lo->plh_return_iomode;
-		send = pnfs_prepare_layoutreturn(lo);
-		spin_unlock(&inode->i_lock);
-		if (send) {
-			/* Send an async layoutreturn so we dont deadlock */
-			pnfs_send_layoutreturn(lo, &stateid, iomode, false);
-		}
-	} else
-		spin_unlock(&inode->i_lock);
-}
-
 void
 pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 {
@@ -410,15 +379,8 @@
 		atomic_read(&lseg->pls_refcount),
 		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 
-	/* Handle the case where refcount != 1 */
-	if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
-		return;
-
 	lo = lseg->pls_layout;
 	inode = lo->plh_inode;
-	/* Do we need a layoutreturn? */
-	if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-		pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
 
 	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
 		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -613,9 +575,8 @@
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	lo = nfsi->layout;
 	if (lo) {
-		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-		pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 		pnfs_get_layout_hdr(lo);
+		pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
 		spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -676,11 +637,6 @@
 {
 	struct pnfs_layout_hdr *lo;
 	struct inode *inode;
-	struct pnfs_layout_range range = {
-		.iomode = IOMODE_ANY,
-		.offset = 0,
-		.length = NFS4_MAX_UINT64,
-	};
 	LIST_HEAD(lseg_list);
 	int ret = 0;
 
@@ -695,11 +651,11 @@
 
 		spin_lock(&inode->i_lock);
 		list_del_init(&lo->plh_bulk_destroy);
-		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-		if (is_bulk_recall)
-			set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
-		if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+		if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+			if (is_bulk_recall)
+				set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 			ret = -EAGAIN;
+		}
 		spin_unlock(&inode->i_lock);
 		pnfs_free_lseg_list(&lseg_list);
 		/* Free all lsegs that are attached to commit buckets */
@@ -937,6 +893,17 @@
 	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+	if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+		return false;
+	lo->plh_return_iomode = 0;
+	pnfs_get_layout_hdr(lo);
+	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+	return true;
+}
+
 static int
 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
 		       enum pnfs_iomode iomode, bool sync)
@@ -971,6 +938,48 @@
 	return status;
 }
 
+/* Return true if layoutreturn is needed */
+static bool
+pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
+{
+	struct pnfs_layout_segment *s;
+
+	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+		return false;
+
+	/* Defer layoutreturn until all lsegs are done */
+	list_for_each_entry(s, &lo->plh_segs, pls_list) {
+		if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
+			return false;
+	}
+
+	return true;
+}
+
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+	struct inode *inode= lo->plh_inode;
+
+	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+		return;
+	spin_lock(&inode->i_lock);
+	if (pnfs_layout_need_return(lo)) {
+		nfs4_stateid stateid;
+		enum pnfs_iomode iomode;
+		bool send;
+
+		nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+		iomode = lo->plh_return_iomode;
+		send = pnfs_prepare_layoutreturn(lo);
+		spin_unlock(&inode->i_lock);
+		if (send) {
+			/* Send an async layoutreturn so we dont deadlock */
+			pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+		}
+	} else
+		spin_unlock(&inode->i_lock);
+}
+
 /*
  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
  * when the layout segment list is empty.
@@ -1091,7 +1100,7 @@
 
 	nfs4_stateid_copy(&stateid, &lo->plh_stateid);
 	/* always send layoutreturn if being marked so */
-	if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+	if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
 				   &lo->plh_flags))
 		layoutreturn = pnfs_prepare_layoutreturn(lo);
 
@@ -1744,8 +1753,19 @@
 	if (lo->plh_return_iomode != 0)
 		iomode = IOMODE_ANY;
 	lo->plh_return_iomode = iomode;
+	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
 }
 
+/**
+ * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
+ * @lo: pointer to layout header
+ * @tmp_list: list header to be used with pnfs_free_lseg_list()
+ * @return_range: describe layout segment ranges to be returned
+ *
+ * This function is mainly intended for use by layoutrecall. It attempts
+ * to free the layout segment immediately, or else to mark it for return
+ * as soon as its reference count drops to zero.
+ */
 int
 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
@@ -1768,12 +1788,11 @@
 				lseg, lseg->pls_range.iomode,
 				lseg->pls_range.offset,
 				lseg->pls_range.length);
+			if (mark_lseg_invalid(lseg, tmp_list))
+				continue;
+			remaining++;
 			set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
 			pnfs_set_plh_return_iomode(lo, return_range->iomode);
-			if (!mark_lseg_invalid(lseg, tmp_list))
-				remaining++;
-			set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-					&lo->plh_flags);
 		}
 	return remaining;
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 9f4e2a4..1ac1db5 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,8 +94,8 @@
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
 	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */
-	NFS_LAYOUT_RETURN,		/* Return this layout ASAP */
-	NFS_LAYOUT_RETURN_BEFORE_CLOSE,	/* Return this layout before close */
+	NFS_LAYOUT_RETURN,		/* layoutreturn in progress */
+	NFS_LAYOUT_RETURN_REQUESTED,	/* Return this layout ASAP */
 	NFS_LAYOUT_INVALID_STID,	/* layout stateid id is invalid */
 	NFS_LAYOUT_FIRST_LAYOUTGET,	/* Serialize first layoutget */
 };
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index cfcbf11..7115c5d 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,7 +91,14 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+#define FSNOTIFY_REAPER_DELAY	(1)	/* 1 jiffy */
+
 struct srcu_struct fsnotify_mark_srcu;
+static DEFINE_SPINLOCK(destroy_lock);
+static LIST_HEAD(destroy_list);
+
+static void fsnotify_mark_destroy(struct work_struct *work);
+static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
 
 void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
@@ -165,19 +172,10 @@
 	atomic_dec(&group->num_marks);
 }
 
-static void
-fsnotify_mark_free_rcu(struct rcu_head *rcu)
-{
-	struct fsnotify_mark	*mark;
-
-	mark = container_of(rcu, struct fsnotify_mark, g_rcu);
-	fsnotify_put_mark(mark);
-}
-
 /*
- * Free fsnotify mark. The freeing is actually happening from a call_srcu
- * callback. Caller must have a reference to the mark or be protected by
- * fsnotify_mark_srcu.
+ * Free fsnotify mark. The freeing is actually happening from a kthread which
+ * first waits for srcu period end. Caller must have a reference to the mark
+ * or be protected by fsnotify_mark_srcu.
  */
 void fsnotify_free_mark(struct fsnotify_mark *mark)
 {
@@ -192,7 +190,11 @@
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 	spin_unlock(&mark->lock);
 
-	call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
+	spin_lock(&destroy_lock);
+	list_add(&mark->g_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	queue_delayed_work(system_unbound_wq, &reaper_work,
+				FSNOTIFY_REAPER_DELAY);
 
 	/*
 	 * Some groups like to know that marks are being freed.  This is a
@@ -388,7 +390,12 @@
 
 	spin_unlock(&mark->lock);
 
-	call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
+	spin_lock(&destroy_lock);
+	list_add(&mark->g_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	queue_delayed_work(system_unbound_wq, &reaper_work,
+				FSNOTIFY_REAPER_DELAY);
+
 	return ret;
 }
 
@@ -491,3 +498,21 @@
 	atomic_set(&mark->refcnt, 1);
 	mark->free_mark = free_mark;
 }
+
+static void fsnotify_mark_destroy(struct work_struct *work)
+{
+	struct fsnotify_mark *mark, *next;
+	struct list_head private_destroy_list;
+
+	spin_lock(&destroy_lock);
+	/* exchange the list head */
+	list_replace_init(&destroy_list, &private_destroy_list);
+	spin_unlock(&destroy_lock);
+
+	synchronize_srcu(&fsnotify_mark_srcu);
+
+	list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
+		list_del_init(&mark->g_list);
+		fsnotify_put_mark(mark);
+	}
+}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd15..cda0361 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -956,6 +956,7 @@
 		tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
 				update_isize, end);
 		if (tmp_ret < 0) {
+			ocfs2_inode_unlock(inode, 1);
 			ret = tmp_ret;
 			mlog_errno(ret);
 			brelse(di_bh);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a3cc6d2..a76b9ea 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1254,15 +1254,15 @@
 
 void o2hb_exit(void)
 {
-	kfree(o2hb_db_livenodes);
-	kfree(o2hb_db_liveregions);
-	kfree(o2hb_db_quorumregions);
-	kfree(o2hb_db_failedregions);
 	debugfs_remove(o2hb_debug_failedregions);
 	debugfs_remove(o2hb_debug_quorumregions);
 	debugfs_remove(o2hb_debug_liveregions);
 	debugfs_remove(o2hb_debug_livenodes);
 	debugfs_remove(o2hb_debug_dir);
+	kfree(o2hb_db_livenodes);
+	kfree(o2hb_db_liveregions);
+	kfree(o2hb_db_quorumregions);
+	kfree(o2hb_db_failedregions);
 }
 
 static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@
 
 	kfree(reg->hr_slots);
 
-	kfree(reg->hr_db_regnum);
-	kfree(reg->hr_db_livenodes);
 	debugfs_remove(reg->hr_debug_livenodes);
 	debugfs_remove(reg->hr_debug_regnum);
 	debugfs_remove(reg->hr_debug_elapsed_time);
 	debugfs_remove(reg->hr_debug_pinned);
 	debugfs_remove(reg->hr_debug_dir);
+	kfree(reg->hr_db_livenodes);
+	kfree(reg->hr_db_regnum);
+	kfree(reg->hr_debug_elapsed_time);
+	kfree(reg->hr_debug_pinned);
 
 	spin_lock(&o2hb_live_lock);
 	list_del(&reg->hr_all_item);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index c5bdf02..b94a425 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2367,6 +2367,8 @@
 						break;
 					}
 				}
+				dlm_lockres_clear_refmap_bit(dlm, res,
+						dead_node);
 				spin_unlock(&res->spinlock);
 				continue;
 			}
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9581d190..77ebc2b 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -147,6 +147,10 @@
 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
 	if (ret < 0) {
 		mlog_errno(ret);
+		if (ret == -ENOMEM)
+			ret = VM_FAULT_OOM;
+		else
+			ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
 
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ed95272..52f6de5 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -618,7 +618,8 @@
 	 * sole user of this dentry.  Too tricky...  Just unhash for
 	 * now.
 	 */
-	d_drop(dentry);
+	if (!err)
+		d_drop(dentry);
 	inode_unlock(dir);
 
 	return err;
@@ -903,6 +904,13 @@
 	if (!overwrite && new_is_dir && !old_opaque && new_opaque)
 		ovl_remove_opaque(newdentry);
 
+	/*
+	 * Old dentry now lives in different location. Dentries in
+	 * lowerstack are stale. We cannot drop them here because
+	 * access to them is lockless. This could be only pure upper
+	 * or opaque directory - numlower is zero. Or upper non-dir
+	 * entry - its pureness is tracked by flag opaque.
+	 */
 	if (old_opaque != new_opaque) {
 		ovl_dentry_set_opaque(old, new_opaque);
 		if (!overwrite)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 49e2045..a4ff5d0 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -65,6 +65,8 @@
 
 		inode_lock(upperdentry->d_inode);
 		err = notify_change(upperdentry, attr, NULL);
+		if (!err)
+			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
 	}
 	ovl_drop_write(dentry);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 8d826bd..619ad4b 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -76,12 +76,14 @@
 	if (oe->__upperdentry) {
 		type = __OVL_PATH_UPPER;
 
-		if (oe->numlower) {
-			if (S_ISDIR(dentry->d_inode->i_mode))
-				type |= __OVL_PATH_MERGE;
-		} else if (!oe->opaque) {
+		/*
+		 * Non-dir dentry can hold lower dentry from previous
+		 * location. Its purity depends only on opaque flag.
+		 */
+		if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode))
+			type |= __OVL_PATH_MERGE;
+		else if (!oe->opaque)
 			type |= __OVL_PATH_PURE;
-		}
 	} else {
 		if (oe->numlower > 1)
 			type |= __OVL_PATH_MERGE;
@@ -341,6 +343,7 @@
 
 static const struct dentry_operations ovl_reval_dentry_operations = {
 	.d_release = ovl_dentry_release,
+	.d_select_inode = ovl_d_select_inode,
 	.d_revalidate = ovl_dentry_revalidate,
 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 };
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e..c524fdd 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
 static int propagate_one(struct mount *m)
 {
 	struct mount *child;
@@ -212,7 +217,7 @@
 	/* skip if mountpoint isn't covered by it */
 	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
 		return 0;
-	if (m->mnt_group_id == last_dest->mnt_group_id) {
+	if (peers(m, last_dest)) {
 		type = CL_MAKE_SHARED;
 	} else {
 		struct mount *n, *p;
@@ -223,7 +228,7 @@
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}
-				if (n->mnt_group_id != last_dest->mnt_group_id) {
+				if (!peers(n, last_dest)) {
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 85d16c6..fa95ab2 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -259,23 +259,29 @@
 				sizeof(struct proc_maps_private));
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-				struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+		    struct vm_area_struct *vma, int is_pid)
 {
-	struct inode *inode = priv->inode;
-	struct task_struct *task;
-	pid_t ret = 0;
+	int stack = 0;
 
-	rcu_read_lock();
-	task = pid_task(proc_pid(inode), PIDTYPE_PID);
-	if (task) {
-		task = task_of_stack(task, vma, is_pid);
+	if (is_pid) {
+		stack = vma->vm_start <= vma->vm_mm->start_stack &&
+			vma->vm_end >= vma->vm_mm->start_stack;
+	} else {
+		struct inode *inode = priv->inode;
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(proc_pid(inode), PIDTYPE_PID);
 		if (task)
-			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+			stack = vma_is_stack_for_task(vma, task);
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
-
-	return ret;
+	return stack;
 }
 
 static void
@@ -335,8 +341,6 @@
 
 	name = arch_vma_name(vma);
 	if (!name) {
-		pid_t tid;
-
 		if (!mm) {
 			name = "[vdso]";
 			goto done;
@@ -348,21 +352,8 @@
 			goto done;
 		}
 
-		tid = pid_of_stack(priv, vma, is_pid);
-		if (tid != 0) {
-			/*
-			 * Thread stack in /proc/PID/task/TID/maps or
-			 * the main process stack.
-			 */
-			if (!is_pid || (vma->vm_start <= mm->start_stack &&
-			    vma->vm_end >= mm->start_stack)) {
-				name = "[stack]";
-			} else {
-				/* Thread stack in /proc/PID/maps */
-				seq_pad(m, ' ');
-				seq_printf(m, "[stack:%d]", tid);
-			}
-		}
+		if (is_stack(priv, vma, is_pid))
+			name = "[stack]";
 	}
 
 done:
@@ -1552,18 +1543,19 @@
 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
 		unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
+	pte_t huge_pte = huge_ptep_get(pte);
 	struct numa_maps *md;
 	struct page *page;
 
-	if (!pte_present(*pte))
+	if (!pte_present(huge_pte))
 		return 0;
 
-	page = pte_page(*pte);
+	page = pte_page(huge_pte);
 	if (!page)
 		return 0;
 
 	md = walk->private;
-	gather_stats(page, md, pte_dirty(*pte), 1);
+	gather_stats(page, md, pte_dirty(huge_pte), 1);
 	return 0;
 }
 
@@ -1617,19 +1609,8 @@
 		seq_file_path(m, file, "\n\t= ");
 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
 		seq_puts(m, " heap");
-	} else {
-		pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
-		if (tid != 0) {
-			/*
-			 * Thread stack in /proc/PID/task/TID/maps or
-			 * the main process stack.
-			 */
-			if (!is_pid || (vma->vm_start <= mm->start_stack &&
-			    vma->vm_end >= mm->start_stack))
-				seq_puts(m, " stack");
-			else
-				seq_printf(m, " stack:%d", tid);
-		}
+	} else if (is_stack(proc_priv, vma, is_pid)) {
+		seq_puts(m, " stack");
 	}
 
 	if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index e0d64c9..faacb0c 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@
 	return size;
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-				struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+		    struct vm_area_struct *vma, int is_pid)
 {
-	struct inode *inode = priv->inode;
-	struct task_struct *task;
-	pid_t ret = 0;
+	struct mm_struct *mm = vma->vm_mm;
+	int stack = 0;
 
-	rcu_read_lock();
-	task = pid_task(proc_pid(inode), PIDTYPE_PID);
-	if (task) {
-		task = task_of_stack(task, vma, is_pid);
+	if (is_pid) {
+		stack = vma->vm_start <= mm->start_stack &&
+			vma->vm_end >= mm->start_stack;
+	} else {
+		struct inode *inode = priv->inode;
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(proc_pid(inode), PIDTYPE_PID);
 		if (task)
-			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+			stack = vma_is_stack_for_task(vma, task);
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
-
-	return ret;
+	return stack;
 }
 
 /*
@@ -181,21 +184,9 @@
 	if (file) {
 		seq_pad(m, ' ');
 		seq_file_path(m, file, "");
-	} else if (mm) {
-		pid_t tid = pid_of_stack(priv, vma, is_pid);
-
-		if (tid != 0) {
-			seq_pad(m, ' ');
-			/*
-			 * Thread stack in /proc/PID/task/TID/maps or
-			 * the main process stack.
-			 */
-			if (!is_pid || (vma->vm_start <= mm->start_stack &&
-			    vma->vm_end >= mm->start_stack))
-				seq_printf(m, "[stack]");
-			else
-				seq_printf(m, "[stack:%d]", tid);
-		}
+	} else if (mm && is_stack(priv, vma, is_pid)) {
+		seq_pad(m, ' ');
+		seq_printf(m, "[stack]");
 	}
 
 	seq_putc(m, '\n');
diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec27..dadf24e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include <linux/mount.h>
+#include <linux/fs.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -183,7 +184,7 @@
 	switch (whence) {
 	case SEEK_SET: case SEEK_CUR:
 		return generic_file_llseek_size(file, offset, whence,
-						~0ULL, 0);
+						OFFSET_MAX, 0);
 	default:
 		return -EINVAL;
 	}
@@ -1532,10 +1533,12 @@
 
 	if (!(file_in->f_mode & FMODE_READ) ||
 	    !(file_out->f_mode & FMODE_WRITE) ||
-	    (file_out->f_flags & O_APPEND) ||
-	    !file_in->f_op->clone_file_range)
+	    (file_out->f_flags & O_APPEND))
 		return -EBADF;
 
+	if (!file_in->f_op->clone_file_range)
+		return -EOPNOTSUPP;
+
 	ret = clone_verify_area(file_in, pos_in, len, false);
 	if (ret)
 		return ret;
diff --git a/fs/super.c b/fs/super.c
index 1182af8..74914b1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@
 		sb->s_flags &= ~MS_ACTIVE;
 
 		fsnotify_unmount_inodes(sb);
+		cgroup_writeback_umount();
 
 		evict_inodes(sb);
 
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b94fa6c..053818d 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -153,7 +153,7 @@
 	if (isalarm(ctx))
 		remaining = alarm_expires_remaining(&ctx->t.alarm);
 	else
-		remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+		remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
 	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5031170..66cdb44 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -287,6 +287,12 @@
 		goto out;
 
 	/*
+	 * We don't do userfault handling for the final child pid update.
+	 */
+	if (current->flags & PF_EXITING)
+		goto out;
+
+	/*
 	 * Check that we can return VM_FAULT_RETRY.
 	 *
 	 * NOTE: it should become possible to return VM_FAULT_RETRY
diff --git a/fs/xattr.c b/fs/xattr.c
index 07d0e47..4861322 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -940,7 +940,7 @@
 	bool trusted = capable(CAP_SYS_ADMIN);
 	struct simple_xattr *xattr;
 	ssize_t remaining_size = size;
-	int err;
+	int err = 0;
 
 #ifdef CONFIG_FS_POSIX_ACL
 	if (inode->i_acl) {
@@ -965,11 +965,11 @@
 
 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
 		if (err)
-			return err;
+			break;
 	}
 	spin_unlock(&xattrs->lock);
 
-	return size - remaining_size;
+	return err ? err : size - remaining_size;
 }
 
 /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089..a9ebabfe 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -55,7 +55,7 @@
 	} while ((bh = bh->b_this_page) != head);
 }
 
-STATIC struct block_device *
+struct block_device *
 xfs_find_bdev_for_inode(
 	struct inode		*inode)
 {
@@ -1208,6 +1208,10 @@
 	struct writeback_control *wbc)
 {
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+	if (dax_mapping(mapping))
+		return dax_writeback_mapping_range(mapping,
+				xfs_find_bdev_for_inode(mapping->host), wbc);
+
 	return generic_writepages(mapping, wbc);
 }
 
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9a..a4343c6 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,5 +62,6 @@
 			         struct buffer_head *map_bh, int create);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 45ec9e4..6c87601 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,7 +75,8 @@
 	ssize_t		size = XFS_FSB_TO_B(mp, count_fsb);
 
 	if (IS_DAX(VFS_I(ip)))
-		return dax_clear_blocks(VFS_I(ip), block, size);
+		return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
+				sector, size);
 
 	/*
 	 * let the block layer decide on the fastest method of
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index da37beb..be55688 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@
 	bool			tmp_wrapped;
 
 	/*
-	 * Search backwards through the log looking for the log record header
-	 * block. This wraps all the way back around to the head so something is
-	 * seriously wrong if we can't find it.
-	 */
-	found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-				      rhead, wrapped);
-	if (found < 0)
-		return found;
-	if (!found) {
-		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-		return -EIO;
-	}
-
-	*tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-
-	/*
-	 * Now that we have a tail block, check the head of the log for torn
-	 * writes. Search again until we hit the tail or the maximum number of
-	 * log record I/Os that could have been in flight at one time. Use a
-	 * temporary buffer so we don't trash the rhead/bp pointer from the
-	 * call above.
+	 * Check the head of the log for torn writes. Search backwards from the
+	 * head until we hit the tail or the maximum number of log record I/Os
+	 * that could have been in flight at one time. Use a temporary buffer so
+	 * we don't trash the rhead/bp pointers from the caller.
 	 */
 	tmp_bp = xlog_get_bp(log, 1);
 	if (!tmp_bp)
@@ -1216,6 +1199,115 @@
 }
 
 /*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+	struct xlog		*log,
+	xfs_daddr_t		*head_blk,
+	xfs_daddr_t		*tail_blk,
+	struct xlog_rec_header	*rhead,
+	xfs_daddr_t		rhead_blk,
+	struct xfs_buf		*bp,
+	bool			*clean)
+{
+	struct xlog_op_header	*op_head;
+	xfs_daddr_t		umount_data_blk;
+	xfs_daddr_t		after_umount_blk;
+	int			hblks;
+	int			error;
+	char			*offset;
+
+	*clean = false;
+
+	/*
+	 * Look for unmount record. If we find it, then we know there was a
+	 * clean unmount. Since 'i' could be the last block in the physical
+	 * log, we convert to a log block before comparing to the head_blk.
+	 *
+	 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+	 * below. We won't want to clear the unmount record if there is one, so
+	 * we pass the lsn of the unmount record rather than the block after it.
+	 */
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+		int	h_size = be32_to_cpu(rhead->h_size);
+		int	h_version = be32_to_cpu(rhead->h_version);
+
+		if ((h_version & XLOG_VERSION_2) &&
+		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+			if (h_size % XLOG_HEADER_CYCLE_SIZE)
+				hblks++;
+		} else {
+			hblks = 1;
+		}
+	} else {
+		hblks = 1;
+	}
+	after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+	after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+	if (*head_blk == after_umount_blk &&
+	    be32_to_cpu(rhead->h_num_logops) == 1) {
+		umount_data_blk = rhead_blk + hblks;
+		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+		if (error)
+			return error;
+
+		op_head = (struct xlog_op_header *)offset;
+		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+			/*
+			 * Set tail and last sync so that newly written log
+			 * records will point recovery to after the current
+			 * unmount record.
+			 */
+			xlog_assign_atomic_lsn(&log->l_tail_lsn,
+					log->l_curr_cycle, after_umount_blk);
+			xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+					log->l_curr_cycle, after_umount_blk);
+			*tail_blk = after_umount_blk;
+
+			*clean = true;
+		}
+	}
+
+	return 0;
+}
+
+static void
+xlog_set_state(
+	struct xlog		*log,
+	xfs_daddr_t		head_blk,
+	struct xlog_rec_header	*rhead,
+	xfs_daddr_t		rhead_blk,
+	bool			bump_cycle)
+{
+	/*
+	 * Reset log values according to the state of the log when we
+	 * crashed.  In the case where head_blk == 0, we bump curr_cycle
+	 * one because the next write starts a new cycle rather than
+	 * continuing the cycle of the last good log record.  At this
+	 * point we have guaranteed that all partial log records have been
+	 * accounted for.  Therefore, we know that the last good log record
+	 * written was complete and ended exactly on the end boundary
+	 * of the physical log.
+	 */
+	log->l_prev_block = rhead_blk;
+	log->l_curr_block = (int)head_blk;
+	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+	if (bump_cycle)
+		log->l_curr_cycle++;
+	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+					BBTOB(log->l_curr_block));
+	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+					BBTOB(log->l_curr_block));
+}
+
+/*
  * Find the sync block number or the tail of the log.
  *
  * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@
 	xfs_daddr_t		*tail_blk)
 {
 	xlog_rec_header_t	*rhead;
-	xlog_op_header_t	*op_head;
 	char			*offset = NULL;
 	xfs_buf_t		*bp;
 	int			error;
-	xfs_daddr_t		umount_data_blk;
-	xfs_daddr_t		after_umount_blk;
 	xfs_daddr_t		rhead_blk;
 	xfs_lsn_t		tail_lsn;
-	int			hblks;
 	bool			wrapped = false;
+	bool			clean = false;
 
 	/*
 	 * Find previous log record
 	 */
 	if ((error = xlog_find_head(log, head_blk)))
 		return error;
+	ASSERT(*head_blk < INT_MAX);
 
 	bp = xlog_get_bp(log, 1);
 	if (!bp)
@@ -1271,100 +1361,75 @@
 	}
 
 	/*
-	 * Trim the head block back to skip over torn records. We can have
-	 * multiple log I/Os in flight at any time, so we assume CRC failures
-	 * back through the previous several records are torn writes and skip
-	 * them.
+	 * Search backwards through the log looking for the log record header
+	 * block. This wraps all the way back around to the head so something is
+	 * seriously wrong if we can't find it.
 	 */
-	ASSERT(*head_blk < INT_MAX);
-	error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
-				 &rhead, &wrapped);
+	error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+				      &rhead_blk, &rhead, &wrapped);
+	if (error < 0)
+		return error;
+	if (!error) {
+		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+		return -EIO;
+	}
+	*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
+
+	/*
+	 * Set the log state based on the current head record.
+	 */
+	xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
+	tail_lsn = atomic64_read(&log->l_tail_lsn);
+
+	/*
+	 * Look for an unmount record at the head of the log. This sets the log
+	 * state to determine whether recovery is necessary.
+	 */
+	error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+				       rhead_blk, bp, &clean);
 	if (error)
 		goto done;
 
 	/*
-	 * Reset log values according to the state of the log when we
-	 * crashed.  In the case where head_blk == 0, we bump curr_cycle
-	 * one because the next write starts a new cycle rather than
-	 * continuing the cycle of the last good log record.  At this
-	 * point we have guaranteed that all partial log records have been
-	 * accounted for.  Therefore, we know that the last good log record
-	 * written was complete and ended exactly on the end boundary
-	 * of the physical log.
-	 */
-	log->l_prev_block = rhead_blk;
-	log->l_curr_block = (int)*head_blk;
-	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-	if (wrapped)
-		log->l_curr_cycle++;
-	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-					BBTOB(log->l_curr_block));
-	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-					BBTOB(log->l_curr_block));
-
-	/*
-	 * Look for unmount record.  If we find it, then we know there
-	 * was a clean unmount.  Since 'i' could be the last block in
-	 * the physical log, we convert to a log block before comparing
-	 * to the head_blk.
+	 * Verify the log head if the log is not clean (e.g., we have anything
+	 * but an unmount record at the head). This uses CRC verification to
+	 * detect and trim torn writes. If discovered, CRC failures are
+	 * considered torn writes and the log head is trimmed accordingly.
 	 *
-	 * Save the current tail lsn to use to pass to
-	 * xlog_clear_stale_blocks() below.  We won't want to clear the
-	 * unmount record if there is one, so we pass the lsn of the
-	 * unmount record rather than the block after it.
+	 * Note that we can only run CRC verification when the log is dirty
+	 * because there's no guarantee that the log data behind an unmount
+	 * record is compatible with the current architecture.
 	 */
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-		int	h_size = be32_to_cpu(rhead->h_size);
-		int	h_version = be32_to_cpu(rhead->h_version);
+	if (!clean) {
+		xfs_daddr_t	orig_head = *head_blk;
 
-		if ((h_version & XLOG_VERSION_2) &&
-		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-			if (h_size % XLOG_HEADER_CYCLE_SIZE)
-				hblks++;
-		} else {
-			hblks = 1;
-		}
-	} else {
-		hblks = 1;
-	}
-	after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-	after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-	tail_lsn = atomic64_read(&log->l_tail_lsn);
-	if (*head_blk == after_umount_blk &&
-	    be32_to_cpu(rhead->h_num_logops) == 1) {
-		umount_data_blk = rhead_blk + hblks;
-		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+		error = xlog_verify_head(log, head_blk, tail_blk, bp,
+					 &rhead_blk, &rhead, &wrapped);
 		if (error)
 			goto done;
 
-		op_head = (xlog_op_header_t *)offset;
-		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
-			/*
-			 * Set tail and last sync so that newly written
-			 * log records will point recovery to after the
-			 * current unmount record.
-			 */
-			xlog_assign_atomic_lsn(&log->l_tail_lsn,
-					log->l_curr_cycle, after_umount_blk);
-			xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
-					log->l_curr_cycle, after_umount_blk);
-			*tail_blk = after_umount_blk;
-
-			/*
-			 * Note that the unmount was clean. If the unmount
-			 * was not clean, we need to know this to rebuild the
-			 * superblock counters from the perag headers if we
-			 * have a filesystem using non-persistent counters.
-			 */
-			log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+		/* update in-core state again if the head changed */
+		if (*head_blk != orig_head) {
+			xlog_set_state(log, *head_blk, rhead, rhead_blk,
+				       wrapped);
+			tail_lsn = atomic64_read(&log->l_tail_lsn);
+			error = xlog_check_unmount_rec(log, head_blk, tail_blk,
+						       rhead, rhead_blk, bp,
+						       &clean);
+			if (error)
+				goto done;
 		}
 	}
 
 	/*
+	 * Note that the unmount was clean. If the unmount was not clean, we
+	 * need to know this to rebuild the superblock counters from the perag
+	 * headers if we have a filesystem using non-persistent counters.
+	 */
+	if (clean)
+		log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+
+	/*
 	 * Make sure that there are no blocks in front of the head
 	 * with the same cycle number as the head.  This can happen
 	 * because we allow multiple outstanding log writes concurrently,
@@ -4491,7 +4556,7 @@
 	 * know precisely what failed.
 	 */
 	if (pass == XLOG_RECOVER_CRCPASS) {
-		if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+		if (rhead->h_crc && crc != rhead->h_crc)
 			return -EFSBADCRC;
 		return 0;
 	}
@@ -4502,7 +4567,7 @@
 	 * zero CRC check prevents warnings from being emitted when upgrading
 	 * the kernel from one that does not add CRCs by default.
 	 */
-	if (crc != le32_to_cpu(rhead->h_crc)) {
+	if (crc != rhead->h_crc) {
 		if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
 			xfs_alert(log->l_mp,
 		"log record CRC mismatch: found 0x%x, expected 0x%x.",
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 717a298..dad8af3 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -133,6 +133,5 @@
 /* Methods to interact with the PCC mailbox controller. */
 extern struct mbox_chan *
 	pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-extern int mbox_send_message(struct mbox_chan *chan, void *mssg);
 
 #endif /* _CPPC_ACPI_H*/
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index 0419485..0f1c6f3 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -75,7 +75,7 @@
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
 	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
@@ -91,7 +91,8 @@
  */
 static inline cputime_t timeval_to_cputime(const struct timeval *val)
 {
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	u64 ret = (u64)val->tv_sec * NSEC_PER_SEC +
+			val->tv_usec * NSEC_PER_USEC;
 	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 0b3c0d3..c370b26 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -239,6 +239,14 @@
 			    pmd_t *pmdp);
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+					   unsigned long address, pmd_t *pmdp)
+{
+
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTE_SAME
 static inline int pte_same(pte_t pte_a, pte_t pte_b)
 {
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 39e1cb2..35a52a8 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -120,11 +120,6 @@
 #endif
 
 /*
- * Initializier
- */
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
-
-/*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
  */
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 85f888e..034acd0 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -33,6 +33,11 @@
 } arch_spinlock_t;
 
 /*
+ * Initializier
+ */
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
+
+/*
  * Bitfields in the atomic value:
  *
  * When NR_CPUS < 16K
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c4bd0e2..772c784 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -256,6 +256,7 @@
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
+		*(.data..ro_after_init)	/* Read only after init */	\
 		*(__vermagic)		/* Kernel version magic */	\
 		. = ALIGN(8);						\
 		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 89d008d..fe5efad 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -42,6 +42,10 @@
 			     struct drm_atomic_state *state,
 			     bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 					struct drm_atomic_state *old_state);
 
diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 7bfb063..461a055 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -35,4 +35,13 @@
 
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 
+static inline bool drm_arch_can_wc_memory(void)
+{
+#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
+	return false;
+#else
+	return true;
+#endif
+}
+
 #endif
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index c65a212..c5b4b81 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -1166,6 +1166,7 @@
 	struct drm_mode_object base;
 
 	char *name;
+	int connector_id;
 	int connector_type;
 	int connector_type_id;
 	bool interlace_allowed;
@@ -2047,6 +2048,7 @@
 	struct list_head fb_list;
 
 	int num_connector;
+	struct ida connector_ida;
 	struct list_head connector_list;
 	int num_encoder;
 	struct list_head encoder_list;
@@ -2200,7 +2202,11 @@
 void drm_connector_unregister(struct drm_connector *connector);
 
 extern void drm_connector_cleanup(struct drm_connector *connector);
-extern unsigned int drm_connector_index(struct drm_connector *connector);
+static inline unsigned drm_connector_index(struct drm_connector *connector)
+{
+	return connector->connector_id;
+}
+
 /* helper to unplug all connectors from sysfs for device */
 extern void drm_connector_unplug_all(struct drm_device *dev);
 
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 24ab178..fdb4705 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -44,8 +44,6 @@
 /**
  * struct drm_dp_mst_port - MST port
  * @kref: reference count for this port.
- * @guid_valid: for DP 1.2 devices if we have validated the GUID.
- * @guid: guid for DP 1.2 device on this port.
  * @port_num: port number
  * @input: if this port is an input port.
  * @mcs: message capability status - DP 1.2 spec.
@@ -70,10 +68,6 @@
 struct drm_dp_mst_port {
 	struct kref kref;
 
-	/* if dpcd 1.2 device is on this port - its GUID info */
-	bool guid_valid;
-	u8 guid[16];
-
 	u8 port_num;
 	bool input;
 	bool mcs;
@@ -110,10 +104,12 @@
  * @tx_slots: transmission slots for this device.
  * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
+ * @guid: guid for DP 1.2 branch device. port under this branch can be
+ * identified by port #.
  *
  * This structure represents an MST branch device, there is one
- * primary branch device at the root, along with any others connected
- * to downstream ports
+ * primary branch device at the root, along with any other branches connected
+ * to downstream port of parent branches.
  */
 struct drm_dp_mst_branch {
 	struct kref kref;
@@ -132,6 +128,9 @@
 	struct drm_dp_sideband_msg_tx *tx_slots[2];
 	int last_seqno;
 	bool link_address_sent;
+
+	/* global unique identifier to identify branch devices */
+	u8 guid[16];
 };
 
 
@@ -406,11 +405,9 @@
  * @conn_base_id: DRM connector ID this mgr is connected to.
  * @down_rep_recv: msg receiver state for down replies.
  * @up_req_recv: msg receiver state for up requests.
- * @lock: protects mst state, primary, guid, dpcd.
+ * @lock: protects mst state, primary, dpcd.
  * @mst_state: if this manager is enabled for an MST capable port.
  * @mst_primary: pointer to the primary branch device.
- * @guid_valid: GUID valid for the primary branch device.
- * @guid: GUID for primary port.
  * @dpcd: cache of DPCD for primary port.
  * @pbn_div: PBN to slots divisor.
  *
@@ -432,13 +429,11 @@
 	struct drm_dp_sideband_msg_rx up_req_recv;
 
 	/* pointer to info about the initial MST device */
-	struct mutex lock; /* protects mst_state + primary + guid + dpcd */
+	struct mutex lock; /* protects mst_state + primary + dpcd */
 
 	bool mst_state;
 	struct drm_dp_mst_branch *mst_primary;
-	/* primary MST device GUID */
-	bool guid_valid;
-	u8 guid[16];
+
 	u8 dpcd[DP_RECEIVER_CAP_SIZE];
 	u8 sink_count;
 	int pbn_div;
diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index d639049..553210c 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -73,18 +73,28 @@
 #define DRM_FIXED_ONE		(1ULL << DRM_FIXED_POINT)
 #define DRM_FIXED_DECIMAL_MASK	(DRM_FIXED_ONE - 1)
 #define DRM_FIXED_DIGITS_MASK	(~DRM_FIXED_DECIMAL_MASK)
+#define DRM_FIXED_EPSILON	1LL
+#define DRM_FIXED_ALMOST_ONE	(DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
 static inline s64 drm_int2fixp(int a)
 {
 	return ((s64)a) << DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int(int64_t a)
+static inline int drm_fixp2int(s64 a)
 {
 	return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline unsigned drm_fixp_msbset(int64_t a)
+static inline int drm_fixp2int_ceil(s64 a)
+{
+	if (a > 0)
+		return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
+	else
+		return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
+}
+
+static inline unsigned drm_fixp_msbset(s64 a)
 {
 	unsigned shift, sign = (a >> 63) & 1;
 
@@ -136,6 +146,45 @@
 	return result;
 }
 
+static inline s64 drm_fixp_from_fraction(s64 a, s64 b)
+{
+	s64 res;
+	bool a_neg = a < 0;
+	bool b_neg = b < 0;
+	u64 a_abs = a_neg ? -a : a;
+	u64 b_abs = b_neg ? -b : b;
+	u64 rem;
+
+	/* determine integer part */
+	u64 res_abs  = div64_u64_rem(a_abs, b_abs, &rem);
+
+	/* determine fractional part */
+	{
+		u32 i = DRM_FIXED_POINT;
+
+		do {
+			rem <<= 1;
+			res_abs <<= 1;
+			if (rem >= b_abs) {
+				res_abs |= 1;
+				rem -= b_abs;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		u64 summand = (rem << 1) >= b_abs;
+
+		res_abs += summand;
+	}
+
+	res = (s64) res_abs;
+	if (a_neg ^ b_neg)
+		res = -res;
+	return res;
+}
+
 static inline s64 drm_fixp_exp(s64 x)
 {
 	s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000);
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index 6f45aea..0a05b0d 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -126,7 +126,7 @@
 /* 104 */
 /* 105 */
 #define TEGRA210_CLK_D_AUDIO 106
-/* 107 ( affects abp -> ape) */
+#define TEGRA210_CLK_APB2APE 107
 /* 108 */
 /* 109 */
 /* 110 */
diff --git a/include/linux/ata.h b/include/linux/ata.h
index d2992bf..c1a2f34 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -487,8 +487,8 @@
 };
 
 enum ata_ioctls {
-	ATA_IOC_GET_IO32	= 0x309,
-	ATA_IOC_SET_IO32	= 0x324,
+	ATA_IOC_GET_IO32	= 0x309, /* HDIO_GET_32BIT */
+	ATA_IOC_SET_IO32	= 0x324, /* HDIO_SET_32BIT */
 };
 
 /* core structures */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78..6c502cb 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -548,6 +548,27 @@
 }
 #endif
 
+/**
+ * fetch_or - perform *ptr |= mask and return old value of *ptr
+ * @ptr: pointer to value
+ * @mask: mask to OR on the value
+ *
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#ifndef fetch_or
+#define fetch_or(ptr, mask)						\
+({	typeof(*(ptr)) __old, __val = *(ptr);				\
+	for (;;) {							\
+		__old = cmpxchg((ptr), __val, __val | (mask));		\
+		if (__old == __val)					\
+			break;						\
+		__val = __old;						\
+	}								\
+	__old;								\
+})
+#endif
+
+
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5349e68..88bc64f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -310,6 +310,38 @@
 	bio->bi_flags &= ~(1U << bit);
 }
 
+static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	*bv = bio_iovec(bio);
+}
+
+static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	struct bvec_iter iter = bio->bi_iter;
+	int idx;
+
+	if (unlikely(!bio_multiple_segments(bio))) {
+		*bv = bio_iovec(bio);
+		return;
+	}
+
+	bio_advance_iter(bio, &iter, iter.bi_size);
+
+	if (!iter.bi_bvec_done)
+		idx = iter.bi_idx - 1;
+	else	/* in the middle of bvec */
+		idx = iter.bi_idx;
+
+	*bv = bio->bi_io_vec[idx];
+
+	/*
+	 * iter.bi_bvec_done records actual length of the last bvec
+	 * if this bio ends in the middle of one io vector
+	 */
+	if (iter.bi_bvec_done)
+		bv->bv_len = iter.bi_bvec_done;
+}
+
 enum bip_flags {
 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 29189ae..413c84f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -682,9 +682,12 @@
 /*
  * q->prep_rq_fn return values
  */
-#define BLKPREP_OK		0	/* serve it */
-#define BLKPREP_KILL		1	/* fatal error, kill */
-#define BLKPREP_DEFER		2	/* leave on queue */
+enum {
+	BLKPREP_OK,		/* serve it */
+	BLKPREP_KILL,		/* fatal error, kill, return -EIO */
+	BLKPREP_DEFER,		/* leave on queue */
+	BLKPREP_INVALID,	/* invalid command, kill, return -EREMOTEIO */
+};
 
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 
@@ -892,7 +895,7 @@
 {
 	struct request_queue *q = rq->q;
 
-	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
+	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
 		return q->limits.max_hw_sectors;
 
 	if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
@@ -1369,6 +1372,13 @@
 	page_cache_release(p.v);
 }
 
+static inline bool __bvec_gap_to_prev(struct request_queue *q,
+				struct bio_vec *bprv, unsigned int offset)
+{
+	return offset ||
+		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
 /*
  * Check if adding a bio_vec after bprv with offset would create a gap in
  * the SG list. Most drivers don't care about this, but some do.
@@ -1378,18 +1388,22 @@
 {
 	if (!queue_virt_boundary(q))
 		return false;
-	return offset ||
-		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 			 struct bio *next)
 {
-	if (!bio_has_data(prev))
-		return false;
+	if (bio_has_data(prev) && queue_virt_boundary(q)) {
+		struct bio_vec pb, nb;
 
-	return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
-				next->bi_io_vec[0].bv_offset);
+		bio_get_last_bvec(prev, &pb);
+		bio_get_first_bvec(next, &nb);
+
+		return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+	}
+
+	return false;
 }
 
 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 17e7e82..1be04f8 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -12,10 +12,24 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+/*
+ * __read_mostly is used to keep rarely changing variables out of frequently
+ * updated cachelines. If an architecture doesn't support it, ignore the
+ * hint.
+ */
 #ifndef __read_mostly
 #define __read_mostly
 #endif
 
+/*
+ * __ro_after_init is used to mark things that are read-only after init (i.e.
+ * after mark_rodata_ro() has been called). These are effectively read-only,
+ * but may get written to during init, so can't live in .rodata (via "const").
+ */
+#ifndef __ro_after_init
+#define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
+#endif
+
 #ifndef ____cacheline_aligned
 #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
 #endif
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index f89b31d..15151f3 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -63,6 +63,19 @@
 #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
+#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
+#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
+#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
+#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
+#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
+#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
+#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
+#define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -108,7 +121,9 @@
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
-	 CEPH_FEATURE_CRUSH_V4)
+	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
+	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7f540f7..789471d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -127,6 +127,12 @@
 	 */
 	u64 serial_nr;
 
+	/*
+	 * Incremented by online self and children.  Used to guarantee that
+	 * parents are not offlined before their children.
+	 */
+	atomic_t online_cnt;
+
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index bda5ec0b4..fccf7f4 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -37,7 +37,7 @@
 	void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
@@ -48,14 +48,14 @@
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-	return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
 	return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+	return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@
 
 static inline int cleancache_get_page(struct page *page)
 {
-	int ret = -1;
-
 	if (cleancache_enabled && cleancache_fs_enabled(page))
-		ret = __cleancache_get_page(page);
-	return ret;
+		return __cleancache_get_page(page);
+	return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 00b042c..b5ff988 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -20,12 +20,14 @@
 # define __pmem		__attribute__((noderef, address_space(5)))
 #ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
-#else
+#else /* CONFIG_SPARSE_RCU_POINTER */
 # define __rcu
-#endif
+#endif /* CONFIG_SPARSE_RCU_POINTER */
+# define __private	__attribute__((noderef))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
-#else
+# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
+#else /* __CHECKER__ */
 # define __user
 # define __kernel
 # define __safe
@@ -44,7 +46,9 @@
 # define __percpu
 # define __rcu
 # define __pmem
-#endif
+# define __private
+# define ACCESS_PRIVATE(p, member) ((p)->member)
+#endif /* __CHECKER__ */
 
 /* Indirect macros required for expanded argument pasting, eg. __LINE__. */
 #define ___PASTE(a,b) a##b
@@ -144,7 +148,7 @@
  */
 #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
 #define __trace_if(cond) \
-	if (__builtin_constant_p((cond)) ? !!(cond) :			\
+	if (__builtin_constant_p(!!(cond)) ? !!(cond) :			\
 	({								\
 		int ______r;						\
 		static struct ftrace_branch_data			\
@@ -263,8 +267,9 @@
  * In contrast to ACCESS_ONCE these two macros will also work on aggregate
  * data types like structs or unions. If the size of the accessed data
  * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
- * compile-time warning.
+ * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
+ * least two memcpy()s: one for the __builtin_memcpy() and then one for
+ * the macro doing the copy of variable - '__u' allocated on the stack.
  *
  * Their two major use cases are: (1) Mediating communication between
  * process-level code and irq/NMI handlers, all running on the same CPU,
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 85a868c..fea160e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,6 +137,8 @@
 	task_unlock(current);
 }
 
+extern void cpuset_post_attach_flush(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -243,6 +245,10 @@
 	return false;
 }
 
+static inline void cpuset_post_attach_flush(void)
+{
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 48b4930..be8f12b 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -59,7 +59,8 @@
 	CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
 	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
 	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
-	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
+	CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -205,6 +206,11 @@
 	 * mappings line up a bit better with previous mappings. */
 	__u8 chooseleaf_vary_r;
 
+	/* if true, it makes chooseleaf firstn to return stable results (if
+	 * no local retry) so that data migrations would be optimal when some
+	 * device fails. */
+	__u8 chooseleaf_stable;
+
 #ifndef __KERNEL__
 	/*
 	 * version 0 (original) of straw_calc has various flaws.  version 1
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8204c3d..636dd59 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -7,13 +7,24 @@
 
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 		  get_block_t, dio_iodone_t, int flags);
-int dax_clear_blocks(struct inode *, sector_t block, long size);
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
+
+#ifdef CONFIG_FS_DAX
+struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+#else
+static inline struct page *read_dax_sector(struct block_device *bdev,
+		sector_t n)
+{
+	return ERR_PTR(-ENXIO);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
 				unsigned int flags, get_block_t, dax_iodone_t);
@@ -41,6 +52,8 @@
 {
 	return mapping->host && IS_DAX(mapping->host);
 }
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
-		loff_t end);
+
+struct writeback_control;
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc);
 #endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7781ce11..c4b5f4b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -409,9 +409,7 @@
  */
 static inline unsigned __d_entry_type(const struct dentry *dentry)
 {
-	unsigned type = READ_ONCE(dentry->d_flags);
-	smp_rmb();
-	return type & DCACHE_ENTRY_TYPE;
+	return dentry->d_flags & DCACHE_ENTRY_TYPE;
 }
 
 static inline bool d_is_miss(const struct dentry *dentry)
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 251a209..e0ee0b3 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -19,6 +19,8 @@
 
 int devpts_new_index(struct inode *ptmx_inode);
 void devpts_kill_index(struct inode *ptmx_inode, int idx);
+void devpts_add_ref(struct inode *ptmx_inode);
+void devpts_del_ref(struct inode *ptmx_inode);
 /* mknod in devpts */
 struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
 		void *priv);
@@ -32,6 +34,8 @@
 /* Dummy stubs in the no-pty case */
 static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
 static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
+static inline void devpts_add_ref(struct inode *ptmx_inode) { }
+static inline void devpts_del_ref(struct inode *ptmx_inode) { }
 static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
 		dev_t device, int index, void *priv)
 {
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 75857cd..5e45cf9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -386,7 +386,7 @@
 	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 		return;
 
-	if (!ops->free)
+	if (!ops->free || !cpu_addr)
 		return;
 
 	debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
@@ -641,31 +641,40 @@
 }
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
-static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
-					   dma_addr_t *dma_addr, gfp_t gfp)
+static inline void *dma_alloc_wc(struct device *dev, size_t size,
+				 dma_addr_t *dma_addr, gfp_t gfp)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs);
 }
+#ifndef dma_alloc_writecombine
+#define dma_alloc_writecombine dma_alloc_wc
+#endif
 
-static inline void dma_free_writecombine(struct device *dev, size_t size,
-					 void *cpu_addr, dma_addr_t dma_addr)
+static inline void dma_free_wc(struct device *dev, size_t size,
+			       void *cpu_addr, dma_addr_t dma_addr)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs);
 }
+#ifndef dma_free_writecombine
+#define dma_free_writecombine dma_free_wc
+#endif
 
-static inline int dma_mmap_writecombine(struct device *dev,
-					struct vm_area_struct *vma,
-					void *cpu_addr, dma_addr_t dma_addr,
-					size_t size)
+static inline int dma_mmap_wc(struct device *dev,
+			      struct vm_area_struct *vma,
+			      void *cpu_addr, dma_addr_t dma_addr,
+			      size_t size)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
 }
+#ifndef dma_mmap_writecombine
+#define dma_mmap_writecombine dma_mmap_wc
+#endif
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 569b5a8..47be3ad 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1199,7 +1199,10 @@
 struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 				       struct list_head *head, bool remove);
 
-bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len);
+bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data,
+		     unsigned long data_size);
+bool efivar_variable_is_removable(efi_guid_t vendor, const char *name,
+				  size_t len);
 
 extern struct work_struct efivar_work;
 void efivar_run_worker(void);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a20462..ae68100 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -484,9 +484,6 @@
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
-#ifdef CONFIG_FS_DAX
-	int			bd_map_count;
-#endif
 };
 
 /*
@@ -2907,7 +2904,7 @@
 
 static inline bool io_is_direct(struct file *filp)
 {
-	return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
+	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
 static inline int iocb_flags(struct file *file)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 6b7e89f..533c440 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -220,10 +220,7 @@
 	/* List of marks by group->i_fsnotify_marks. Also reused for queueing
 	 * mark into destroy_list when it's waiting for the end of SRCU period
 	 * before it can be freed. [group->mark_mutex] */
-	union {
-		struct list_head g_list;
-		struct rcu_head g_rcu;
-	};
+	struct list_head g_list;
 	/* Protects inode / mnt pointers, flags, masks */
 	spinlock_t lock;
 	/* List of marks for inode / vfsmount [obj_lock] */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0639dcc..6d9df3f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,7 +165,6 @@
 	ftrace_func_t			saved_func;
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-	int				nr_trampolines;
 	struct ftrace_ops_hash		local_hash;
 	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_ops_hash		old_hash;
@@ -604,6 +603,7 @@
 
 extern int skip_trace(unsigned long ip);
 extern void ftrace_module_init(struct module *mod);
+extern void ftrace_module_enable(struct module *mod);
 extern void ftrace_release_mod(struct module *mod);
 
 extern void ftrace_disable_daemon(void);
@@ -613,8 +613,9 @@
 static inline int ftrace_force_update(void) { return 0; }
 static inline void ftrace_disable_daemon(void) { }
 static inline void ftrace_enable_daemon(void) { }
-static inline void ftrace_release_mod(struct module *mod) {}
-static inline void ftrace_module_init(struct module *mod) {}
+static inline void ftrace_module_init(struct module *mod) { }
+static inline void ftrace_module_enable(struct module *mod) { }
+static inline void ftrace_release_mod(struct module *mod) { }
 static inline __init int register_ftrace_command(struct ftrace_func_command *cmd)
 {
 	return -EINVAL;
@@ -712,6 +713,18 @@
 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
 
+static inline unsigned long get_lock_parent_ip(void)
+{
+	unsigned long addr = CALLER_ADDR0;
+
+	if (!in_lock_functions(addr))
+		return addr;
+	addr = CALLER_ADDR1;
+	if (!in_lock_functions(addr))
+		return addr;
+	return CALLER_ADDR2;
+}
+
 #ifdef CONFIG_IRQSOFF_TRACER
   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
   extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 28ad5f6..af1f2b2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -547,16 +547,16 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_CMA
-
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
 			      unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+#endif
 
+#ifdef CONFIG_CMA
 /* CMA stuff */
 extern void init_cma_reserved_pageblock(struct page *page);
-
 #endif
 
 #endif /* __LINUX_GFP_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 76dd4f0..2ead22d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -87,7 +87,8 @@
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:	Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *		started the timer
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
@@ -101,7 +102,8 @@
 	ktime_t				_softexpires;
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
-	unsigned long			state;
+	u8				state;
+	u8				is_rel;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
@@ -321,6 +323,27 @@
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+	ktime_t rem = ktime_sub(timer->node.expires, now);
+
+	/*
+	 * Adjust relative timers for the extra we added in
+	 * hrtimer_start_range_ns() to prevent short timeouts.
+	 */
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+		rem.tv64 -= hrtimer_resolution;
+	return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+	return __hrtimer_expires_remaining_adjusted(timer,
+						    timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+	return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
diff --git a/include/linux/init.h b/include/linux/init.h
index b449f37..aedb254 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -142,6 +142,10 @@
 void __init load_default_modules(void);
 int __init init_rootfs(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
 extern void (*late_time_init)(void);
 
 extern bool initcall_debug;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 821273c..2d9b6500 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -235,6 +235,9 @@
 /* low 64 bit */
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
 
+/* PRS_REG */
+#define DMA_PRS_PPR	((u32)1)
+
 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts)			\
 do {									\
 	cycles_t start_time = get_cycles();				\
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f28dff3..a5c539f 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -133,8 +133,9 @@
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-	/* Set the numer of window per domain */
+	/* Set the number of windows per domain */
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-	/* Get the numer of window per domain */
+	/* Get the number of windows per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 24bea08..afb4559 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -20,6 +20,7 @@
 	resource_size_t end;
 	const char *name;
 	unsigned long flags;
+	unsigned long desc;
 	struct resource *parent, *sibling, *child;
 };
 
@@ -49,12 +50,19 @@
 #define IORESOURCE_WINDOW	0x00200000	/* forwarded by bridge */
 #define IORESOURCE_MUXED	0x00400000	/* Resource is software muxed */
 
+#define IORESOURCE_EXT_TYPE_BITS 0x01000000	/* Resource extended types */
+#define IORESOURCE_SYSRAM	0x01000000	/* System RAM (modifier) */
+
 #define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
+
 #define IORESOURCE_DISABLED	0x10000000
 #define IORESOURCE_UNSET	0x20000000	/* No address assigned yet */
 #define IORESOURCE_AUTO		0x40000000
 #define IORESOURCE_BUSY		0x80000000	/* Driver has marked this resource busy */
 
+/* I/O resource extended types */
+#define IORESOURCE_SYSTEM_RAM		(IORESOURCE_MEM|IORESOURCE_SYSRAM)
+
 /* PnP IRQ specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IRQ_HIGHEDGE		(1<<0)
 #define IORESOURCE_IRQ_LOWEDGE		(1<<1)
@@ -105,6 +113,22 @@
 /* PCI control bits.  Shares IORESOURCE_BITS with above PCI ROM.  */
 #define IORESOURCE_PCI_FIXED		(1<<4)	/* Do not move resource */
 
+/*
+ * I/O Resource Descriptors
+ *
+ * Descriptors are used by walk_iomem_res_desc() and region_intersects()
+ * for searching a specific resource range in the iomem table.  Assign
+ * a new descriptor when a resource range supports the search interfaces.
+ * Otherwise, resource.desc must be set to IORES_DESC_NONE (0).
+ */
+enum {
+	IORES_DESC_NONE				= 0,
+	IORES_DESC_CRASH_KERNEL			= 1,
+	IORES_DESC_ACPI_TABLES			= 2,
+	IORES_DESC_ACPI_NV_STORAGE		= 3,
+	IORES_DESC_PERSISTENT_MEMORY		= 4,
+	IORES_DESC_PERSISTENT_MEMORY_LEGACY	= 5,
+};
 
 /* helpers to define resources */
 #define DEFINE_RES_NAMED(_start, _size, _name, _flags)			\
@@ -113,6 +137,7 @@
 		.end = (_start) + (_size) - 1,				\
 		.name = (_name),					\
 		.flags = (_flags),					\
+		.desc = IORES_DESC_NONE,				\
 	}
 
 #define DEFINE_RES_IO_NAMED(_start, _size, _name)			\
@@ -170,6 +195,10 @@
 {
 	return res->flags & IORESOURCE_TYPE_BITS;
 }
+static inline unsigned long resource_ext_type(const struct resource *res)
+{
+	return res->flags & IORESOURCE_EXT_TYPE_BITS;
+}
 /* True iff r1 completely contains r2 */
 static inline bool resource_contains(struct resource *r1, struct resource *r2)
 {
@@ -239,8 +268,8 @@
 walk_system_ram_res(u64 start, u64 end, void *arg,
 		    int (*func)(u64, u64, void *));
 extern int
-walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
-	       int (*func)(u64, u64, void *));
+walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
+		    void *arg, int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3c1c967..cd14cd4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -137,7 +137,7 @@
  * @msi_desc:		MSI descriptor
  */
 struct irq_common_data {
-	unsigned int		state_use_accessors;
+	unsigned int		__private state_use_accessors;
 #ifdef CONFIG_NUMA
 	unsigned int		node;
 #endif
@@ -208,7 +208,7 @@
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 };
 
-#define __irqd_to_state(d)		((d)->common->state_use_accessors)
+#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
 
 static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
 {
@@ -299,6 +299,8 @@
 	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
 }
 
+#undef __irqd_to_state
+
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 {
 	return d->hwirq;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f64622a..04579d9 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -70,6 +70,7 @@
  */
 enum irq_domain_bus_token {
 	DOMAIN_BUS_ANY		= 0,
+	DOMAIN_BUS_WIRED,
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4b9f85c..0fdc798 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
+#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;
@@ -13,7 +14,6 @@
 
 #include <asm/kasan.h>
 #include <asm/pgtable.h>
-#include <linux/sched.h>
 
 extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
@@ -43,6 +43,8 @@
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
+void kasan_unpoison_task_stack(struct task_struct *task);
+
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
 
@@ -66,6 +68,8 @@
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
+static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+
 static inline void kasan_enable_current(void) {}
 static inline void kasan_disable_current(void) {}
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 861f690..5276fe0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -25,6 +25,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
+#include <linux/swait.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -218,7 +219,7 @@
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	unsigned char fpu_counter;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	struct pid *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -782,7 +783,7 @@
 }
 #endif
 
-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
 	return vcpu->arch.wqp;
diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index e23121f..59ccab2 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -37,6 +37,9 @@
 
 void clear_all_latency_tracing(struct task_struct *p);
 
+extern int sysctl_latencytop(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #else
 
 static inline void
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 851821b..2c4ebef 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -526,6 +526,7 @@
 enum ata_lpm_hints {
 	ATA_LPM_EMPTY		= (1 << 0), /* port empty/probing */
 	ATA_LPM_HIPM		= (1 << 1), /* may use HIPM */
+	ATA_LPM_WAKE_ONLY	= (1 << 2), /* only wake up link */
 };
 
 /* forward declarations */
@@ -719,7 +720,7 @@
 	union {
 		u16		id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */
 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
-	};
+	} ____cacheline_aligned;
 
 	/* DEVSLP Timing Variables from Identify Device Data Log */
 	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index bed40df..141ffdd 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -26,9 +26,8 @@
 
 	/* need to set a limit somewhere, but yes, this is likely overkill */
 	ND_IOCTL_MAX_BUFLEN = SZ_4M,
-	ND_CMD_MAX_ELEM = 4,
+	ND_CMD_MAX_ELEM = 5,
 	ND_CMD_MAX_ENVELOPE = 16,
-	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 	ND_MAX_MAPPINGS = 32,
 
 	/* region flag indicating to direct-map persistent memory by default */
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d675011..2190419 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -135,6 +135,10 @@
 	/* Memory types */
 	NVM_ID_FMTYPE_SLC	= 0,
 	NVM_ID_FMTYPE_MLC	= 1,
+
+	/* Device capabilities */
+	NVM_ID_DCAP_BBLKMGMT	= 0x1,
+	NVM_UD_DCAP_ECC		= 0x2,
 };
 
 struct nvm_id_lp_mlc {
diff --git a/include/linux/list.h b/include/linux/list.h
index 30cf420..5356f4d 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -113,17 +113,6 @@
 extern void list_del(struct list_head *entry);
 #endif
 
-#ifdef CONFIG_DEBUG_LIST
-/*
- * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one
- * of the pages it allocates is ever passed to list_add()
- */
-extern void list_force_poison(struct list_head *entry);
-#else
-/* fallback to the less strict LIST_POISON* definitions */
-#define list_force_poison list_del
-#endif
-
 /**
  * list_replace - replace old entry by new one
  * @old : the element to be replaced
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index c57e424..d026b19 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -66,7 +66,7 @@
 	/*
 	 * class-hash:
 	 */
-	struct list_head		hash_entry;
+	struct hlist_node		hash_entry;
 
 	/*
 	 * global list of all lock-classes:
@@ -199,7 +199,7 @@
 	u8				irq_context;
 	u8				depth;
 	u16				base;
-	struct list_head		entry;
+	struct hlist_node		entry;
 	u64				chain_key;
 };
 
@@ -261,7 +261,6 @@
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_init(void);
 extern void lockdep_info(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
@@ -392,7 +391,6 @@
 # define lockdep_set_current_reclaim_state(g)	do { } while (0)
 # define lockdep_clear_current_reclaim_state()	do { } while (0)
 # define lockdep_trace_alloc(g)			do { } while (0)
-# define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9ae48d4..792c898 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,7 +51,7 @@
 	MEM_CGROUP_STAT_SWAP,		/* # of pages, swapped out */
 	MEM_CGROUP_STAT_NSTATS,
 	/* default hierarchy stats */
-	MEMCG_SOCK,
+	MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
 	MEMCG_NR_STAT,
 };
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 430a929..a0e8cc8 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -44,6 +44,8 @@
 
 #include <linux/timecounter.h>
 
+#define DEFAULT_UAR_PAGE_SHIFT  12
+
 #define MAX_MSIX_P_PORT		17
 #define MAX_MSIX		64
 #define MIN_MSIX_P_PORT		5
@@ -856,6 +858,7 @@
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 	struct mlx4_vf_dev     *dev_vfs;
+	u8  uar_page_shift;
 };
 
 struct mlx4_clock_params {
@@ -1528,4 +1531,14 @@
 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
 				   struct mlx4_clock_params *params);
 
+static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index)
+{
+	return (index << (PAGE_SHIFT - dev->uar_page_shift));
+}
+
+static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev)
+{
+	/* The first 128 UARs are used for EQ doorbells */
+	return (128 >> (PAGE_SHIFT - dev->uar_page_shift));
+}
 #endif /* MLX4_DEVICE_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 231ab6b..58eef02 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -207,15 +207,15 @@
 	u8         outer_dmac[0x1];
 	u8         outer_smac[0x1];
 	u8         outer_ether_type[0x1];
-	u8         reserved_0[0x1];
+	u8         reserved_at_3[0x1];
 	u8         outer_first_prio[0x1];
 	u8         outer_first_cfi[0x1];
 	u8         outer_first_vid[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_7[0x1];
 	u8         outer_second_prio[0x1];
 	u8         outer_second_cfi[0x1];
 	u8         outer_second_vid[0x1];
-	u8         reserved_2[0x1];
+	u8         reserved_at_b[0x1];
 	u8         outer_sip[0x1];
 	u8         outer_dip[0x1];
 	u8         outer_frag[0x1];
@@ -230,21 +230,21 @@
 	u8         outer_gre_protocol[0x1];
 	u8         outer_gre_key[0x1];
 	u8         outer_vxlan_vni[0x1];
-	u8         reserved_3[0x5];
+	u8         reserved_at_1a[0x5];
 	u8         source_eswitch_port[0x1];
 
 	u8         inner_dmac[0x1];
 	u8         inner_smac[0x1];
 	u8         inner_ether_type[0x1];
-	u8         reserved_4[0x1];
+	u8         reserved_at_23[0x1];
 	u8         inner_first_prio[0x1];
 	u8         inner_first_cfi[0x1];
 	u8         inner_first_vid[0x1];
-	u8         reserved_5[0x1];
+	u8         reserved_at_27[0x1];
 	u8         inner_second_prio[0x1];
 	u8         inner_second_cfi[0x1];
 	u8         inner_second_vid[0x1];
-	u8         reserved_6[0x1];
+	u8         reserved_at_2b[0x1];
 	u8         inner_sip[0x1];
 	u8         inner_dip[0x1];
 	u8         inner_frag[0x1];
@@ -256,37 +256,37 @@
 	u8         inner_tcp_sport[0x1];
 	u8         inner_tcp_dport[0x1];
 	u8         inner_tcp_flags[0x1];
-	u8         reserved_7[0x9];
+	u8         reserved_at_37[0x9];
 
-	u8         reserved_8[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         ft_support[0x1];
-	u8         reserved_0[0x2];
+	u8         reserved_at_1[0x2];
 	u8	   flow_modify_en[0x1];
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
 	u8         flow_table_modify[0x1];
-	u8         reserved_1[0x19];
+	u8         reserved_at_7[0x19];
 
-	u8         reserved_2[0x2];
+	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
-	u8         reserved_3[0x10];
+	u8         reserved_at_28[0x10];
 	u8         max_ft_level[0x8];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_5[0x18];
+	u8         reserved_at_60[0x18];
 	u8         log_max_ft_num[0x8];
 
-	u8         reserved_6[0x18];
+	u8         reserved_at_80[0x18];
 	u8         log_max_destination[0x8];
 
-	u8         reserved_7[0x18];
+	u8         reserved_at_a0[0x18];
 	u8         log_max_flow[0x8];
 
-	u8         reserved_8[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support;
 
@@ -298,13 +298,13 @@
 	u8         receive[0x1];
 	u8         write[0x1];
 	u8         read[0x1];
-	u8         reserved_0[0x1];
+	u8         reserved_at_4[0x1];
 	u8         srq_receive[0x1];
-	u8         reserved_1[0x1a];
+	u8         reserved_at_6[0x1a];
 };
 
 struct mlx5_ifc_ipv4_layout_bits {
-	u8         reserved_0[0x60];
+	u8         reserved_at_0[0x60];
 
 	u8         ipv4[0x20];
 };
@@ -316,7 +316,7 @@
 union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
 	struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
 	struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
-	u8         reserved_0[0x80];
+	u8         reserved_at_0[0x80];
 };
 
 struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
@@ -336,15 +336,15 @@
 	u8         ip_dscp[0x6];
 	u8         ip_ecn[0x2];
 	u8         vlan_tag[0x1];
-	u8         reserved_0[0x1];
+	u8         reserved_at_91[0x1];
 	u8         frag[0x1];
-	u8         reserved_1[0x4];
+	u8         reserved_at_93[0x4];
 	u8         tcp_flags[0x9];
 
 	u8         tcp_sport[0x10];
 	u8         tcp_dport[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_c0[0x20];
 
 	u8         udp_sport[0x10];
 	u8         udp_dport[0x10];
@@ -355,9 +355,9 @@
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         source_port[0x10];
 
 	u8         outer_second_prio[0x3];
@@ -369,31 +369,31 @@
 
 	u8         outer_second_vlan_tag[0x1];
 	u8         inner_second_vlan_tag[0x1];
-	u8         reserved_2[0xe];
+	u8         reserved_at_62[0xe];
 	u8         gre_protocol[0x10];
 
 	u8         gre_key_h[0x18];
 	u8         gre_key_l[0x8];
 
 	u8         vxlan_vni[0x18];
-	u8         reserved_3[0x8];
+	u8         reserved_at_b8[0x8];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_c0[0x20];
 
-	u8         reserved_5[0xc];
+	u8         reserved_at_e0[0xc];
 	u8         outer_ipv6_flow_label[0x14];
 
-	u8         reserved_6[0xc];
+	u8         reserved_at_100[0xc];
 	u8         inner_ipv6_flow_label[0x14];
 
-	u8         reserved_7[0xe0];
+	u8         reserved_at_120[0xe0];
 };
 
 struct mlx5_ifc_cmd_pas_bits {
 	u8         pa_h[0x20];
 
 	u8         pa_l[0x14];
-	u8         reserved_0[0xc];
+	u8         reserved_at_34[0xc];
 };
 
 struct mlx5_ifc_uint64_bits {
@@ -418,31 +418,31 @@
 struct mlx5_ifc_ads_bits {
 	u8         fl[0x1];
 	u8         free_ar[0x1];
-	u8         reserved_0[0xe];
+	u8         reserved_at_2[0xe];
 	u8         pkey_index[0x10];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_20[0x8];
 	u8         grh[0x1];
 	u8         mlid[0x7];
 	u8         rlid[0x10];
 
 	u8         ack_timeout[0x5];
-	u8         reserved_2[0x3];
+	u8         reserved_at_45[0x3];
 	u8         src_addr_index[0x8];
-	u8         reserved_3[0x4];
+	u8         reserved_at_50[0x4];
 	u8         stat_rate[0x4];
 	u8         hop_limit[0x8];
 
-	u8         reserved_4[0x4];
+	u8         reserved_at_60[0x4];
 	u8         tclass[0x8];
 	u8         flow_label[0x14];
 
 	u8         rgid_rip[16][0x8];
 
-	u8         reserved_5[0x4];
+	u8         reserved_at_100[0x4];
 	u8         f_dscp[0x1];
 	u8         f_ecn[0x1];
-	u8         reserved_6[0x1];
+	u8         reserved_at_106[0x1];
 	u8         f_eth_prio[0x1];
 	u8         ecn[0x2];
 	u8         dscp[0x6];
@@ -458,25 +458,25 @@
 };
 
 struct mlx5_ifc_flow_table_nic_cap_bits {
-	u8         reserved_0[0x200];
+	u8         reserved_at_0[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
-	u8         reserved_1[0x200];
+	u8         reserved_at_400[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer;
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit;
 
-	u8         reserved_2[0x200];
+	u8         reserved_at_a00[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-	u8         reserved_3[0x7200];
+	u8         reserved_at_e00[0x7200];
 };
 
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
-	u8     reserved_0[0x200];
+	u8     reserved_at_0[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
@@ -484,7 +484,7 @@
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress;
 
-	u8      reserved_1[0x7800];
+	u8      reserved_at_800[0x7800];
 };
 
 struct mlx5_ifc_e_switch_cap_bits {
@@ -493,9 +493,9 @@
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_0[0x1b];
+	u8         reserved_at_5[0x1b];
 
-	u8         reserved_1[0x7e0];
+	u8         reserved_at_20[0x7e0];
 };
 
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
@@ -504,51 +504,51 @@
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x3];
+	u8         reserved_at_5[0x3];
 	u8         self_lb_en_modifiable[0x1];
-	u8         reserved_1[0x2];
+	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_2[0x4];
+	u8         reserved_at_10[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_3[0x3];
+	u8         reserved_at_18[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_4[0x2];
+	u8         reserved_at_1c[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_6[0x10];
+	u8         reserved_at_40[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_7[0x120];
+	u8         reserved_at_60[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_8[0x600];
+	u8         reserved_at_200[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
 	u8         roce_apm[0x1];
-	u8         reserved_0[0x1f];
+	u8         reserved_at_1[0x1f];
 
-	u8         reserved_1[0x60];
+	u8         reserved_at_20[0x60];
 
-	u8         reserved_2[0xc];
+	u8         reserved_at_80[0xc];
 	u8         l3_type[0x4];
-	u8         reserved_3[0x8];
+	u8         reserved_at_90[0x8];
 	u8         roce_version[0x8];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         r_roce_dest_udp_port[0x10];
 
 	u8         r_roce_max_src_udp_port[0x10];
 	u8         r_roce_min_src_udp_port[0x10];
 
-	u8         reserved_5[0x10];
+	u8         reserved_at_e0[0x10];
 	u8         roce_address_table_size[0x10];
 
-	u8         reserved_6[0x700];
+	u8         reserved_at_100[0x700];
 };
 
 enum {
@@ -576,35 +576,35 @@
 };
 
 struct mlx5_ifc_atomic_caps_bits {
-	u8         reserved_0[0x40];
+	u8         reserved_at_0[0x40];
 
 	u8         atomic_req_8B_endianess_mode[0x2];
-	u8         reserved_1[0x4];
+	u8         reserved_at_42[0x4];
 	u8         supported_atomic_req_8B_endianess_mode_1[0x1];
 
-	u8         reserved_2[0x19];
+	u8         reserved_at_47[0x19];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_80[0x10];
 	u8         atomic_operations[0x10];
 
-	u8         reserved_5[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         atomic_size_qp[0x10];
 
-	u8         reserved_6[0x10];
+	u8         reserved_at_c0[0x10];
 	u8         atomic_size_dc[0x10];
 
-	u8         reserved_7[0x720];
+	u8         reserved_at_e0[0x720];
 };
 
 struct mlx5_ifc_odp_cap_bits {
-	u8         reserved_0[0x40];
+	u8         reserved_at_0[0x40];
 
 	u8         sig[0x1];
-	u8         reserved_1[0x1f];
+	u8         reserved_at_41[0x1f];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps;
 
@@ -612,7 +612,7 @@
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps;
 
-	u8         reserved_3[0x720];
+	u8         reserved_at_e0[0x720];
 };
 
 enum {
@@ -660,55 +660,55 @@
 };
 
 struct mlx5_ifc_cmd_hca_cap_bits {
-	u8         reserved_0[0x80];
+	u8         reserved_at_0[0x80];
 
 	u8         log_max_srq_sz[0x8];
 	u8         log_max_qp_sz[0x8];
-	u8         reserved_1[0xb];
+	u8         reserved_at_90[0xb];
 	u8         log_max_qp[0x5];
 
-	u8         reserved_2[0xb];
+	u8         reserved_at_a0[0xb];
 	u8         log_max_srq[0x5];
-	u8         reserved_3[0x10];
+	u8         reserved_at_b0[0x10];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         log_max_cq_sz[0x8];
-	u8         reserved_5[0xb];
+	u8         reserved_at_d0[0xb];
 	u8         log_max_cq[0x5];
 
 	u8         log_max_eq_sz[0x8];
-	u8         reserved_6[0x2];
+	u8         reserved_at_e8[0x2];
 	u8         log_max_mkey[0x6];
-	u8         reserved_7[0xc];
+	u8         reserved_at_f0[0xc];
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
-	u8         reserved_8[0x1];
+	u8         reserved_at_108[0x1];
 	u8         log_max_mrw_sz[0x7];
-	u8         reserved_9[0x2];
+	u8         reserved_at_110[0x2];
 	u8         log_max_bsf_list_size[0x6];
-	u8         reserved_10[0x2];
+	u8         reserved_at_118[0x2];
 	u8         log_max_klm_list_size[0x6];
 
-	u8         reserved_11[0xa];
+	u8         reserved_at_120[0xa];
 	u8         log_max_ra_req_dc[0x6];
-	u8         reserved_12[0xa];
+	u8         reserved_at_130[0xa];
 	u8         log_max_ra_res_dc[0x6];
 
-	u8         reserved_13[0xa];
+	u8         reserved_at_140[0xa];
 	u8         log_max_ra_req_qp[0x6];
-	u8         reserved_14[0xa];
+	u8         reserved_at_150[0xa];
 	u8         log_max_ra_res_qp[0x6];
 
 	u8         pad_cap[0x1];
 	u8         cc_query_allowed[0x1];
 	u8         cc_modify_allowed[0x1];
-	u8         reserved_15[0xd];
+	u8         reserved_at_163[0xd];
 	u8         gid_table_size[0x10];
 
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
-	u8         reserved_16[0x4];
+	u8         reserved_at_182[0x4];
 	u8         max_qp_cnt[0xa];
 	u8         pkey_table_size[0x10];
 
@@ -716,158 +716,158 @@
 	u8         vhca_group_manager[0x1];
 	u8         ib_virt[0x1];
 	u8         eth_virt[0x1];
-	u8         reserved_17[0x1];
+	u8         reserved_at_1a4[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
 	u8         eswitch_flow_table[0x1];
 	u8	   early_vf_enable;
-	u8         reserved_18[0x2];
+	u8         reserved_at_1a8[0x2];
 	u8         local_ca_ack_delay[0x5];
-	u8         reserved_19[0x6];
+	u8         reserved_at_1af[0x6];
 	u8         port_type[0x2];
 	u8         num_ports[0x8];
 
-	u8         reserved_20[0x3];
+	u8         reserved_at_1bf[0x3];
 	u8         log_max_msg[0x5];
-	u8         reserved_21[0x18];
+	u8         reserved_at_1c7[0x18];
 
 	u8         stat_rate_support[0x10];
-	u8         reserved_22[0xc];
+	u8         reserved_at_1ef[0xc];
 	u8         cqe_version[0x4];
 
 	u8         compact_address_vector[0x1];
-	u8         reserved_23[0xe];
+	u8         reserved_at_200[0xe];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
-	u8         reserved_24[0x1];
+	u8         reserved_at_212[0x1];
 	u8         wq_signature[0x1];
 	u8         sctr_data_cqe[0x1];
-	u8         reserved_25[0x1];
+	u8         reserved_at_215[0x1];
 	u8         sho[0x1];
 	u8         tph[0x1];
 	u8         rf[0x1];
 	u8         dct[0x1];
-	u8         reserved_26[0x1];
+	u8         reserved_at_21a[0x1];
 	u8         eth_net_offloads[0x1];
 	u8         roce[0x1];
 	u8         atomic[0x1];
-	u8         reserved_27[0x1];
+	u8         reserved_at_21e[0x1];
 
 	u8         cq_oi[0x1];
 	u8         cq_resize[0x1];
 	u8         cq_moderation[0x1];
-	u8         reserved_28[0x3];
+	u8         reserved_at_222[0x3];
 	u8         cq_eq_remap[0x1];
 	u8         pg[0x1];
 	u8         block_lb_mc[0x1];
-	u8         reserved_29[0x1];
+	u8         reserved_at_228[0x1];
 	u8         scqe_break_moderation[0x1];
-	u8         reserved_30[0x1];
+	u8         reserved_at_22a[0x1];
 	u8         cd[0x1];
-	u8         reserved_31[0x1];
+	u8         reserved_at_22c[0x1];
 	u8         apm[0x1];
-	u8         reserved_32[0x7];
+	u8         reserved_at_22e[0x7];
 	u8         qkv[0x1];
 	u8         pkv[0x1];
-	u8         reserved_33[0x4];
+	u8         reserved_at_237[0x4];
 	u8         xrc[0x1];
 	u8         ud[0x1];
 	u8         uc[0x1];
 	u8         rc[0x1];
 
-	u8         reserved_34[0xa];
+	u8         reserved_at_23f[0xa];
 	u8         uar_sz[0x6];
-	u8         reserved_35[0x8];
+	u8         reserved_at_24f[0x8];
 	u8         log_pg_sz[0x8];
 
 	u8         bf[0x1];
-	u8         reserved_36[0x1];
+	u8         reserved_at_260[0x1];
 	u8         pad_tx_eth_packet[0x1];
-	u8         reserved_37[0x8];
+	u8         reserved_at_262[0x8];
 	u8         log_bf_reg_size[0x5];
-	u8         reserved_38[0x10];
+	u8         reserved_at_26f[0x10];
 
-	u8         reserved_39[0x10];
+	u8         reserved_at_27f[0x10];
 	u8         max_wqe_sz_sq[0x10];
 
-	u8         reserved_40[0x10];
+	u8         reserved_at_29f[0x10];
 	u8         max_wqe_sz_rq[0x10];
 
-	u8         reserved_41[0x10];
+	u8         reserved_at_2bf[0x10];
 	u8         max_wqe_sz_sq_dc[0x10];
 
-	u8         reserved_42[0x7];
+	u8         reserved_at_2df[0x7];
 	u8         max_qp_mcg[0x19];
 
-	u8         reserved_43[0x18];
+	u8         reserved_at_2ff[0x18];
 	u8         log_max_mcg[0x8];
 
-	u8         reserved_44[0x3];
+	u8         reserved_at_31f[0x3];
 	u8         log_max_transport_domain[0x5];
-	u8         reserved_45[0x3];
+	u8         reserved_at_327[0x3];
 	u8         log_max_pd[0x5];
-	u8         reserved_46[0xb];
+	u8         reserved_at_32f[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_47[0x20];
+	u8         reserved_at_33f[0x20];
 
-	u8         reserved_48[0x3];
+	u8         reserved_at_35f[0x3];
 	u8         log_max_rq[0x5];
-	u8         reserved_49[0x3];
+	u8         reserved_at_367[0x3];
 	u8         log_max_sq[0x5];
-	u8         reserved_50[0x3];
+	u8         reserved_at_36f[0x3];
 	u8         log_max_tir[0x5];
-	u8         reserved_51[0x3];
+	u8         reserved_at_377[0x3];
 	u8         log_max_tis[0x5];
 
 	u8         basic_cyclic_rcv_wqe[0x1];
-	u8         reserved_52[0x2];
+	u8         reserved_at_380[0x2];
 	u8         log_max_rmp[0x5];
-	u8         reserved_53[0x3];
+	u8         reserved_at_387[0x3];
 	u8         log_max_rqt[0x5];
-	u8         reserved_54[0x3];
+	u8         reserved_at_38f[0x3];
 	u8         log_max_rqt_size[0x5];
-	u8         reserved_55[0x3];
+	u8         reserved_at_397[0x3];
 	u8         log_max_tis_per_sq[0x5];
 
-	u8         reserved_56[0x3];
+	u8         reserved_at_39f[0x3];
 	u8         log_max_stride_sz_rq[0x5];
-	u8         reserved_57[0x3];
+	u8         reserved_at_3a7[0x3];
 	u8         log_min_stride_sz_rq[0x5];
-	u8         reserved_58[0x3];
+	u8         reserved_at_3af[0x3];
 	u8         log_max_stride_sz_sq[0x5];
-	u8         reserved_59[0x3];
+	u8         reserved_at_3b7[0x3];
 	u8         log_min_stride_sz_sq[0x5];
 
-	u8         reserved_60[0x1b];
+	u8         reserved_at_3bf[0x1b];
 	u8         log_max_wq_sz[0x5];
 
 	u8         nic_vport_change_event[0x1];
-	u8         reserved_61[0xa];
+	u8         reserved_at_3e0[0xa];
 	u8         log_max_vlan_list[0x5];
-	u8         reserved_62[0x3];
+	u8         reserved_at_3ef[0x3];
 	u8         log_max_current_mc_list[0x5];
-	u8         reserved_63[0x3];
+	u8         reserved_at_3f7[0x3];
 	u8         log_max_current_uc_list[0x5];
 
-	u8         reserved_64[0x80];
+	u8         reserved_at_3ff[0x80];
 
-	u8         reserved_65[0x3];
+	u8         reserved_at_47f[0x3];
 	u8         log_max_l2_table[0x5];
-	u8         reserved_66[0x8];
+	u8         reserved_at_487[0x8];
 	u8         log_uar_page_sz[0x10];
 
-	u8         reserved_67[0x20];
+	u8         reserved_at_49f[0x20];
 	u8         device_frequency_mhz[0x20];
 	u8         device_frequency_khz[0x20];
-	u8         reserved_68[0x5f];
+	u8         reserved_at_4ff[0x5f];
 	u8         cqe_zip[0x1];
 
 	u8         cqe_zip_timeout[0x10];
 	u8         cqe_zip_max_num[0x10];
 
-	u8         reserved_69[0x220];
+	u8         reserved_at_57f[0x220];
 };
 
 enum mlx5_flow_destination_type {
@@ -880,7 +880,7 @@
 	u8         destination_type[0x8];
 	u8         destination_id[0x18];
 
-	u8         reserved_0[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_fte_match_param_bits {
@@ -890,7 +890,7 @@
 
 	struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
 
-	u8         reserved_0[0xa00];
+	u8         reserved_at_600[0xa00];
 };
 
 enum {
@@ -922,18 +922,18 @@
 	u8         wq_signature[0x1];
 	u8         end_padding_mode[0x2];
 	u8         cd_slave[0x1];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         hds_skip_first_sge[0x1];
 	u8         log2_hds_buf_size[0x3];
-	u8         reserved_1[0x7];
+	u8         reserved_at_24[0x7];
 	u8         page_offset[0x5];
 	u8         lwm[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         pd[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_60[0x8];
 	u8         uar_page[0x18];
 
 	u8         dbr_addr[0x40];
@@ -942,60 +942,60 @@
 
 	u8         sw_counter[0x20];
 
-	u8         reserved_4[0xc];
+	u8         reserved_at_100[0xc];
 	u8         log_wq_stride[0x4];
-	u8         reserved_5[0x3];
+	u8         reserved_at_110[0x3];
 	u8         log_wq_pg_sz[0x5];
-	u8         reserved_6[0x3];
+	u8         reserved_at_118[0x3];
 	u8         log_wq_sz[0x5];
 
-	u8         reserved_7[0x4e0];
+	u8         reserved_at_120[0x4e0];
 
 	struct mlx5_ifc_cmd_pas_bits pas[0];
 };
 
 struct mlx5_ifc_rq_num_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         rq_num[0x18];
 };
 
 struct mlx5_ifc_mac_address_layout_bits {
-	u8         reserved_0[0x10];
+	u8         reserved_at_0[0x10];
 	u8         mac_addr_47_32[0x10];
 
 	u8         mac_addr_31_0[0x20];
 };
 
 struct mlx5_ifc_vlan_layout_bits {
-	u8         reserved_0[0x14];
+	u8         reserved_at_0[0x14];
 	u8         vlan[0x0c];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
-	u8         reserved_0[0xa0];
+	u8         reserved_at_0[0xa0];
 
 	u8         min_time_between_cnps[0x20];
 
-	u8         reserved_1[0x12];
+	u8         reserved_at_c0[0x12];
 	u8         cnp_dscp[0x6];
-	u8         reserved_2[0x5];
+	u8         reserved_at_d8[0x5];
 	u8         cnp_802p_prio[0x3];
 
-	u8         reserved_3[0x720];
+	u8         reserved_at_e0[0x720];
 };
 
 struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits {
-	u8         reserved_0[0x60];
+	u8         reserved_at_0[0x60];
 
-	u8         reserved_1[0x4];
+	u8         reserved_at_60[0x4];
 	u8         clamp_tgt_rate[0x1];
-	u8         reserved_2[0x3];
+	u8         reserved_at_65[0x3];
 	u8         clamp_tgt_rate_after_time_inc[0x1];
-	u8         reserved_3[0x17];
+	u8         reserved_at_69[0x17];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_80[0x20];
 
 	u8         rpg_time_reset[0x20];
 
@@ -1015,7 +1015,7 @@
 
 	u8         rpg_min_rate[0x20];
 
-	u8         reserved_5[0xe0];
+	u8         reserved_at_1c0[0xe0];
 
 	u8         rate_to_set_on_first_cnp[0x20];
 
@@ -1025,15 +1025,15 @@
 
 	u8         rate_reduce_monitor_period[0x20];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_320[0x20];
 
 	u8         initial_alpha_value[0x20];
 
-	u8         reserved_7[0x4a0];
+	u8         reserved_at_360[0x4a0];
 };
 
 struct mlx5_ifc_cong_control_802_1qau_rp_bits {
-	u8         reserved_0[0x80];
+	u8         reserved_at_0[0x80];
 
 	u8         rppp_max_rps[0x20];
 
@@ -1055,7 +1055,7 @@
 
 	u8         rpg_min_rate[0x20];
 
-	u8         reserved_1[0x640];
+	u8         reserved_at_1c0[0x640];
 };
 
 enum {
@@ -1205,7 +1205,7 @@
 
 	u8         successful_recovery_events[0x20];
 
-	u8         reserved_0[0x180];
+	u8         reserved_at_640[0x180];
 };
 
 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
@@ -1213,7 +1213,7 @@
 
 	u8         transmit_queue_low[0x20];
 
-	u8         reserved_0[0x780];
+	u8         reserved_at_40[0x780];
 };
 
 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
@@ -1221,7 +1221,7 @@
 
 	u8         rx_octets_low[0x20];
 
-	u8         reserved_0[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	u8         rx_frames_high[0x20];
 
@@ -1231,7 +1231,7 @@
 
 	u8         tx_octets_low[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_180[0xc0];
 
 	u8         tx_frames_high[0x20];
 
@@ -1257,7 +1257,7 @@
 
 	u8         rx_pause_transition_low[0x20];
 
-	u8         reserved_2[0x400];
+	u8         reserved_at_3c0[0x400];
 };
 
 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
@@ -1265,7 +1265,7 @@
 
 	u8         port_transmit_wait_low[0x20];
 
-	u8         reserved_0[0x780];
+	u8         reserved_at_40[0x780];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -1333,7 +1333,7 @@
 
 	u8         dot3out_pause_frames_low[0x20];
 
-	u8         reserved_0[0x3c0];
+	u8         reserved_at_400[0x3c0];
 };
 
 struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits {
@@ -1421,7 +1421,7 @@
 
 	u8         ether_stats_pkts8192to10239octets_low[0x20];
 
-	u8         reserved_0[0x280];
+	u8         reserved_at_540[0x280];
 };
 
 struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits {
@@ -1477,7 +1477,7 @@
 
 	u8         if_out_broadcast_pkts_low[0x20];
 
-	u8         reserved_0[0x480];
+	u8         reserved_at_340[0x480];
 };
 
 struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
@@ -1557,54 +1557,54 @@
 
 	u8         a_pause_mac_ctrl_frames_transmitted_low[0x20];
 
-	u8         reserved_0[0x300];
+	u8         reserved_at_4c0[0x300];
 };
 
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
-	u8         reserved_0[0xc0];
+	u8         reserved_at_20[0xc0];
 };
 
 struct mlx5_ifc_stall_vl_event_bits {
-	u8         reserved_0[0x18];
+	u8         reserved_at_0[0x18];
 	u8         port_num[0x1];
-	u8         reserved_1[0x3];
+	u8         reserved_at_19[0x3];
 	u8         vl[0x4];
 
-	u8         reserved_2[0xa0];
+	u8         reserved_at_20[0xa0];
 };
 
 struct mlx5_ifc_db_bf_congestion_event_bits {
 	u8         event_subtype[0x8];
-	u8         reserved_0[0x8];
+	u8         reserved_at_8[0x8];
 	u8         congestion_level[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_2[0xa0];
+	u8         reserved_at_20[0xa0];
 };
 
 struct mlx5_ifc_gpio_event_bits {
-	u8         reserved_0[0x60];
+	u8         reserved_at_0[0x60];
 
 	u8         gpio_event_hi[0x20];
 
 	u8         gpio_event_lo[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_a0[0x40];
 };
 
 struct mlx5_ifc_port_state_change_event_bits {
-	u8         reserved_0[0x40];
+	u8         reserved_at_0[0x40];
 
 	u8         port_num[0x4];
-	u8         reserved_1[0x1c];
+	u8         reserved_at_44[0x1c];
 
-	u8         reserved_2[0x80];
+	u8         reserved_at_60[0x80];
 };
 
 struct mlx5_ifc_dropped_packet_logged_bits {
-	u8         reserved_0[0xe0];
+	u8         reserved_at_0[0xe0];
 };
 
 enum {
@@ -1613,15 +1613,15 @@
 };
 
 struct mlx5_ifc_cq_error_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         syndrome[0x8];
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_60[0x80];
 };
 
 struct mlx5_ifc_rdma_page_fault_event_bits {
@@ -1629,14 +1629,14 @@
 
 	u8         r_key[0x20];
 
-	u8         reserved_0[0x10];
+	u8         reserved_at_40[0x10];
 	u8         packet_len[0x10];
 
 	u8         rdma_op_len[0x20];
 
 	u8         rdma_va[0x40];
 
-	u8         reserved_1[0x5];
+	u8         reserved_at_c0[0x5];
 	u8         rdma[0x1];
 	u8         write[0x1];
 	u8         requestor[0x1];
@@ -1646,15 +1646,15 @@
 struct mlx5_ifc_wqe_associated_page_fault_event_bits {
 	u8         bytes_committed[0x20];
 
-	u8         reserved_0[0x10];
+	u8         reserved_at_20[0x10];
 	u8         wqe_index[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_40[0x10];
 	u8         len[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_60[0x60];
 
-	u8         reserved_3[0x5];
+	u8         reserved_at_c0[0x5];
 	u8         rdma[0x1];
 	u8         write_read[0x1];
 	u8         requestor[0x1];
@@ -1662,26 +1662,26 @@
 };
 
 struct mlx5_ifc_qp_events_bits {
-	u8         reserved_0[0xa0];
+	u8         reserved_at_0[0xa0];
 
 	u8         type[0x8];
-	u8         reserved_1[0x18];
+	u8         reserved_at_a8[0x18];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         qpn_rqn_sqn[0x18];
 };
 
 struct mlx5_ifc_dct_events_bits {
-	u8         reserved_0[0xc0];
+	u8         reserved_at_0[0xc0];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         dct_number[0x18];
 };
 
 struct mlx5_ifc_comp_event_bits {
-	u8         reserved_0[0xc0];
+	u8         reserved_at_0[0xc0];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         cq_number[0x18];
 };
 
@@ -1754,41 +1754,41 @@
 
 struct mlx5_ifc_qpc_bits {
 	u8         state[0x4];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         st[0x8];
-	u8         reserved_1[0x3];
+	u8         reserved_at_10[0x3];
 	u8         pm_state[0x2];
-	u8         reserved_2[0x7];
+	u8         reserved_at_15[0x7];
 	u8         end_padding_mode[0x2];
-	u8         reserved_3[0x2];
+	u8         reserved_at_1e[0x2];
 
 	u8         wq_signature[0x1];
 	u8         block_lb_mc[0x1];
 	u8         atomic_like_write_en[0x1];
 	u8         latency_sensitive[0x1];
-	u8         reserved_4[0x1];
+	u8         reserved_at_24[0x1];
 	u8         drain_sigerr[0x1];
-	u8         reserved_5[0x2];
+	u8         reserved_at_26[0x2];
 	u8         pd[0x18];
 
 	u8         mtu[0x3];
 	u8         log_msg_max[0x5];
-	u8         reserved_6[0x1];
+	u8         reserved_at_48[0x1];
 	u8         log_rq_size[0x4];
 	u8         log_rq_stride[0x3];
 	u8         no_sq[0x1];
 	u8         log_sq_size[0x4];
-	u8         reserved_7[0x6];
+	u8         reserved_at_55[0x6];
 	u8         rlky[0x1];
-	u8         reserved_8[0x4];
+	u8         reserved_at_5c[0x4];
 
 	u8         counter_set_id[0x8];
 	u8         uar_page[0x18];
 
-	u8         reserved_9[0x8];
+	u8         reserved_at_80[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_10[0x3];
+	u8         reserved_at_a0[0x3];
 	u8         log_page_size[0x5];
 	u8         remote_qpn[0x18];
 
@@ -1797,66 +1797,66 @@
 	struct mlx5_ifc_ads_bits secondary_address_path;
 
 	u8         log_ack_req_freq[0x4];
-	u8         reserved_11[0x4];
+	u8         reserved_at_384[0x4];
 	u8         log_sra_max[0x3];
-	u8         reserved_12[0x2];
+	u8         reserved_at_38b[0x2];
 	u8         retry_count[0x3];
 	u8         rnr_retry[0x3];
-	u8         reserved_13[0x1];
+	u8         reserved_at_393[0x1];
 	u8         fre[0x1];
 	u8         cur_rnr_retry[0x3];
 	u8         cur_retry_count[0x3];
-	u8         reserved_14[0x5];
+	u8         reserved_at_39b[0x5];
 
-	u8         reserved_15[0x20];
+	u8         reserved_at_3a0[0x20];
 
-	u8         reserved_16[0x8];
+	u8         reserved_at_3c0[0x8];
 	u8         next_send_psn[0x18];
 
-	u8         reserved_17[0x8];
+	u8         reserved_at_3e0[0x8];
 	u8         cqn_snd[0x18];
 
-	u8         reserved_18[0x40];
+	u8         reserved_at_400[0x40];
 
-	u8         reserved_19[0x8];
+	u8         reserved_at_440[0x8];
 	u8         last_acked_psn[0x18];
 
-	u8         reserved_20[0x8];
+	u8         reserved_at_460[0x8];
 	u8         ssn[0x18];
 
-	u8         reserved_21[0x8];
+	u8         reserved_at_480[0x8];
 	u8         log_rra_max[0x3];
-	u8         reserved_22[0x1];
+	u8         reserved_at_48b[0x1];
 	u8         atomic_mode[0x4];
 	u8         rre[0x1];
 	u8         rwe[0x1];
 	u8         rae[0x1];
-	u8         reserved_23[0x1];
+	u8         reserved_at_493[0x1];
 	u8         page_offset[0x6];
-	u8         reserved_24[0x3];
+	u8         reserved_at_49a[0x3];
 	u8         cd_slave_receive[0x1];
 	u8         cd_slave_send[0x1];
 	u8         cd_master[0x1];
 
-	u8         reserved_25[0x3];
+	u8         reserved_at_4a0[0x3];
 	u8         min_rnr_nak[0x5];
 	u8         next_rcv_psn[0x18];
 
-	u8         reserved_26[0x8];
+	u8         reserved_at_4c0[0x8];
 	u8         xrcd[0x18];
 
-	u8         reserved_27[0x8];
+	u8         reserved_at_4e0[0x8];
 	u8         cqn_rcv[0x18];
 
 	u8         dbr_addr[0x40];
 
 	u8         q_key[0x20];
 
-	u8         reserved_28[0x5];
+	u8         reserved_at_560[0x5];
 	u8         rq_type[0x3];
 	u8         srqn_rmpn[0x18];
 
-	u8         reserved_29[0x8];
+	u8         reserved_at_580[0x8];
 	u8         rmsn[0x18];
 
 	u8         hw_sq_wqebb_counter[0x10];
@@ -1866,33 +1866,33 @@
 
 	u8         sw_rq_counter[0x20];
 
-	u8         reserved_30[0x20];
+	u8         reserved_at_600[0x20];
 
-	u8         reserved_31[0xf];
+	u8         reserved_at_620[0xf];
 	u8         cgs[0x1];
 	u8         cs_req[0x8];
 	u8         cs_res[0x8];
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_32[0xc0];
+	u8         reserved_at_680[0xc0];
 };
 
 struct mlx5_ifc_roce_addr_layout_bits {
 	u8         source_l3_address[16][0x8];
 
-	u8         reserved_0[0x3];
+	u8         reserved_at_80[0x3];
 	u8         vlan_valid[0x1];
 	u8         vlan_id[0xc];
 	u8         source_mac_47_32[0x10];
 
 	u8         source_mac_31_0[0x20];
 
-	u8         reserved_1[0x14];
+	u8         reserved_at_c0[0x14];
 	u8         roce_l3_type[0x4];
 	u8         roce_version[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_e0[0x20];
 };
 
 union mlx5_ifc_hca_cap_union_bits {
@@ -1904,7 +1904,7 @@
 	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
-	u8         reserved_0[0x8000];
+	u8         reserved_at_0[0x8000];
 };
 
 enum {
@@ -1914,24 +1914,24 @@
 };
 
 struct mlx5_ifc_flow_context_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         group_id[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         flow_tag[0x18];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_60[0x10];
 	u8         action[0x10];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_80[0x8];
 	u8         destination_list_size[0x18];
 
-	u8         reserved_4[0x160];
+	u8         reserved_at_a0[0x160];
 
 	struct mlx5_ifc_fte_match_param_bits match_value;
 
-	u8         reserved_5[0x600];
+	u8         reserved_at_1200[0x600];
 
 	struct mlx5_ifc_dest_format_struct_bits destination[0];
 };
@@ -1944,43 +1944,43 @@
 struct mlx5_ifc_xrc_srqc_bits {
 	u8         state[0x4];
 	u8         log_xrc_srq_size[0x4];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_2[0x2];
+	u8         reserved_at_46[0x2];
 	u8         cqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         user_index_equal_xrc_srqn[0x1];
-	u8         reserved_4[0x1];
+	u8         reserved_at_81[0x1];
 	u8         log_page_size[0x6];
 	u8         user_index[0x18];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_a0[0x20];
 
-	u8         reserved_6[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
 
 	u8         lwm[0x10];
 	u8         wqe_cnt[0x10];
 
-	u8         reserved_7[0x40];
+	u8         reserved_at_100[0x40];
 
 	u8         db_record_addr_h[0x20];
 
 	u8         db_record_addr_l[0x1e];
-	u8         reserved_8[0x2];
+	u8         reserved_at_17e[0x2];
 
-	u8         reserved_9[0x80];
+	u8         reserved_at_180[0x80];
 };
 
 struct mlx5_ifc_traffic_counter_bits {
@@ -1990,16 +1990,16 @@
 };
 
 struct mlx5_ifc_tisc_bits {
-	u8         reserved_0[0xc];
+	u8         reserved_at_0[0xc];
 	u8         prio[0x4];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x100];
+	u8         reserved_at_20[0x100];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_120[0x8];
 	u8         transport_domain[0x18];
 
-	u8         reserved_4[0x3c0];
+	u8         reserved_at_140[0x3c0];
 };
 
 enum {
@@ -2024,31 +2024,31 @@
 };
 
 struct mlx5_ifc_tirc_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         disp_type[0x4];
-	u8         reserved_1[0x1c];
+	u8         reserved_at_24[0x1c];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
-	u8         reserved_3[0x4];
+	u8         reserved_at_80[0x4];
 	u8         lro_timeout_period_usecs[0x10];
 	u8         lro_enable_mask[0x4];
 	u8         lro_max_ip_payload_size[0x8];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_a0[0x40];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         inline_rqn[0x18];
 
 	u8         rx_hash_symmetric[0x1];
-	u8         reserved_6[0x1];
+	u8         reserved_at_101[0x1];
 	u8         tunneled_offload_en[0x1];
-	u8         reserved_7[0x5];
+	u8         reserved_at_103[0x5];
 	u8         indirect_table[0x18];
 
 	u8         rx_hash_fn[0x4];
-	u8         reserved_8[0x2];
+	u8         reserved_at_124[0x2];
 	u8         self_lb_block[0x2];
 	u8         transport_domain[0x18];
 
@@ -2058,7 +2058,7 @@
 
 	struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner;
 
-	u8         reserved_9[0x4c0];
+	u8         reserved_at_2c0[0x4c0];
 };
 
 enum {
@@ -2069,39 +2069,39 @@
 struct mlx5_ifc_srqc_bits {
 	u8         state[0x4];
 	u8         log_srq_size[0x4];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
-	u8         reserved_2[0x1];
+	u8         reserved_at_24[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_3[0x2];
+	u8         reserved_at_46[0x2];
 	u8         cqn[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_60[0x20];
 
-	u8         reserved_5[0x2];
+	u8         reserved_at_80[0x2];
 	u8         log_page_size[0x6];
-	u8         reserved_6[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_a0[0x20];
 
-	u8         reserved_8[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
 
 	u8         lwm[0x10];
 	u8         wqe_cnt[0x10];
 
-	u8         reserved_9[0x40];
+	u8         reserved_at_100[0x40];
 
 	u8         dbr_addr[0x40];
 
-	u8         reserved_10[0x80];
+	u8         reserved_at_180[0x80];
 };
 
 enum {
@@ -2115,39 +2115,39 @@
 	u8         cd_master[0x1];
 	u8         fre[0x1];
 	u8         flush_in_error_en[0x1];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         state[0x4];
-	u8         reserved_1[0x14];
+	u8         reserved_at_c[0x14];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_4[0xa0];
+	u8         reserved_at_60[0xa0];
 
 	u8         tis_lst_sz[0x10];
-	u8         reserved_5[0x10];
+	u8         reserved_at_110[0x10];
 
-	u8         reserved_6[0x40];
+	u8         reserved_at_120[0x40];
 
-	u8         reserved_7[0x8];
+	u8         reserved_at_160[0x8];
 	u8         tis_num_0[0x18];
 
 	struct mlx5_ifc_wq_bits wq;
 };
 
 struct mlx5_ifc_rqtc_bits {
-	u8         reserved_0[0xa0];
+	u8         reserved_at_0[0xa0];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         rqt_max_size[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_c0[0x10];
 	u8         rqt_actual_size[0x10];
 
-	u8         reserved_3[0x6a0];
+	u8         reserved_at_e0[0x6a0];
 
 	struct mlx5_ifc_rq_num_bits rq_num[0];
 };
@@ -2165,27 +2165,27 @@
 
 struct mlx5_ifc_rqc_bits {
 	u8         rlky[0x1];
-	u8         reserved_0[0x2];
+	u8         reserved_at_1[0x2];
 	u8         vsd[0x1];
 	u8         mem_rq_type[0x4];
 	u8         state[0x4];
-	u8         reserved_1[0x1];
+	u8         reserved_at_c[0x1];
 	u8         flush_in_error_en[0x1];
-	u8         reserved_2[0x12];
+	u8         reserved_at_e[0x12];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
 	u8         counter_set_id[0x8];
-	u8         reserved_5[0x18];
+	u8         reserved_at_68[0x18];
 
-	u8         reserved_6[0x8];
+	u8         reserved_at_80[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_7[0xe0];
+	u8         reserved_at_a0[0xe0];
 
 	struct mlx5_ifc_wq_bits wq;
 };
@@ -2196,31 +2196,31 @@
 };
 
 struct mlx5_ifc_rmpc_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         state[0x4];
-	u8         reserved_1[0x14];
+	u8         reserved_at_c[0x14];
 
 	u8         basic_cyclic_rcv_wqe[0x1];
-	u8         reserved_2[0x1f];
+	u8         reserved_at_21[0x1f];
 
-	u8         reserved_3[0x140];
+	u8         reserved_at_40[0x140];
 
 	struct mlx5_ifc_wq_bits wq;
 };
 
 struct mlx5_ifc_nic_vport_context_bits {
-	u8         reserved_0[0x1f];
+	u8         reserved_at_0[0x1f];
 	u8         roce_en[0x1];
 
 	u8         arm_change_event[0x1];
-	u8         reserved_1[0x1a];
+	u8         reserved_at_21[0x1a];
 	u8         event_on_mtu[0x1];
 	u8         event_on_promisc_change[0x1];
 	u8         event_on_vlan_change[0x1];
 	u8         event_on_mc_address_change[0x1];
 	u8         event_on_uc_address_change[0x1];
 
-	u8         reserved_2[0xf0];
+	u8         reserved_at_40[0xf0];
 
 	u8         mtu[0x10];
 
@@ -2228,21 +2228,21 @@
 	u8         port_guid[0x40];
 	u8         node_guid[0x40];
 
-	u8         reserved_3[0x140];
+	u8         reserved_at_200[0x140];
 	u8         qkey_violation_counter[0x10];
-	u8         reserved_4[0x430];
+	u8         reserved_at_350[0x430];
 
 	u8         promisc_uc[0x1];
 	u8         promisc_mc[0x1];
 	u8         promisc_all[0x1];
-	u8         reserved_5[0x2];
+	u8         reserved_at_783[0x2];
 	u8         allowed_list_type[0x3];
-	u8         reserved_6[0xc];
+	u8         reserved_at_788[0xc];
 	u8         allowed_list_size[0xc];
 
 	struct mlx5_ifc_mac_address_layout_bits permanent_address;
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_7e0[0x20];
 
 	u8         current_uc_mac_address[0][0x40];
 };
@@ -2254,9 +2254,9 @@
 };
 
 struct mlx5_ifc_mkc_bits {
-	u8         reserved_0[0x1];
+	u8         reserved_at_0[0x1];
 	u8         free[0x1];
-	u8         reserved_1[0xd];
+	u8         reserved_at_2[0xd];
 	u8         small_fence_on_rdma_read_response[0x1];
 	u8         umr_en[0x1];
 	u8         a[0x1];
@@ -2265,19 +2265,19 @@
 	u8         lw[0x1];
 	u8         lr[0x1];
 	u8         access_mode[0x2];
-	u8         reserved_2[0x8];
+	u8         reserved_at_18[0x8];
 
 	u8         qpn[0x18];
 	u8         mkey_7_0[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         length64[0x1];
 	u8         bsf_en[0x1];
 	u8         sync_umr[0x1];
-	u8         reserved_4[0x2];
+	u8         reserved_at_63[0x2];
 	u8         expected_sigerr_count[0x1];
-	u8         reserved_5[0x1];
+	u8         reserved_at_66[0x1];
 	u8         en_rinval[0x1];
 	u8         pd[0x18];
 
@@ -2287,18 +2287,18 @@
 
 	u8         bsf_octword_size[0x20];
 
-	u8         reserved_6[0x80];
+	u8         reserved_at_120[0x80];
 
 	u8         translations_octword_size[0x20];
 
-	u8         reserved_7[0x1b];
+	u8         reserved_at_1c0[0x1b];
 	u8         log_page_size[0x5];
 
-	u8         reserved_8[0x20];
+	u8         reserved_at_1e0[0x20];
 };
 
 struct mlx5_ifc_pkey_bits {
-	u8         reserved_0[0x10];
+	u8         reserved_at_0[0x10];
 	u8         pkey[0x10];
 };
 
@@ -2309,19 +2309,19 @@
 struct mlx5_ifc_hca_vport_context_bits {
 	u8         field_select[0x20];
 
-	u8         reserved_0[0xe0];
+	u8         reserved_at_20[0xe0];
 
 	u8         sm_virt_aware[0x1];
 	u8         has_smi[0x1];
 	u8         has_raw[0x1];
 	u8         grh_required[0x1];
-	u8         reserved_1[0xc];
+	u8         reserved_at_104[0xc];
 	u8         port_physical_state[0x4];
 	u8         vport_state_policy[0x4];
 	u8         port_state[0x4];
 	u8         vport_state[0x4];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_120[0x20];
 
 	u8         system_image_guid[0x40];
 
@@ -2337,33 +2337,33 @@
 
 	u8         cap_mask2_field_select[0x20];
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_280[0x80];
 
 	u8         lid[0x10];
-	u8         reserved_4[0x4];
+	u8         reserved_at_310[0x4];
 	u8         init_type_reply[0x4];
 	u8         lmc[0x3];
 	u8         subnet_timeout[0x5];
 
 	u8         sm_lid[0x10];
 	u8         sm_sl[0x4];
-	u8         reserved_5[0xc];
+	u8         reserved_at_334[0xc];
 
 	u8         qkey_violation_counter[0x10];
 	u8         pkey_violation_counter[0x10];
 
-	u8         reserved_6[0xca0];
+	u8         reserved_at_360[0xca0];
 };
 
 struct mlx5_ifc_esw_vport_context_bits {
-	u8         reserved_0[0x3];
+	u8         reserved_at_0[0x3];
 	u8         vport_svlan_strip[0x1];
 	u8         vport_cvlan_strip[0x1];
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert[0x2];
-	u8         reserved_1[0x18];
+	u8         reserved_at_8[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 
 	u8         svlan_cfi[0x1];
 	u8         svlan_pcp[0x3];
@@ -2372,7 +2372,7 @@
 	u8         cvlan_pcp[0x3];
 	u8         cvlan_id[0xc];
 
-	u8         reserved_3[0x7a0];
+	u8         reserved_at_60[0x7a0];
 };
 
 enum {
@@ -2387,41 +2387,41 @@
 
 struct mlx5_ifc_eqc_bits {
 	u8         status[0x4];
-	u8         reserved_0[0x9];
+	u8         reserved_at_4[0x9];
 	u8         ec[0x1];
 	u8         oi[0x1];
-	u8         reserved_1[0x5];
+	u8         reserved_at_f[0x5];
 	u8         st[0x4];
-	u8         reserved_2[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_4[0x14];
+	u8         reserved_at_40[0x14];
 	u8         page_offset[0x6];
-	u8         reserved_5[0x6];
+	u8         reserved_at_5a[0x6];
 
-	u8         reserved_6[0x3];
+	u8         reserved_at_60[0x3];
 	u8         log_eq_size[0x5];
 	u8         uar_page[0x18];
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_80[0x20];
 
-	u8         reserved_8[0x18];
+	u8         reserved_at_a0[0x18];
 	u8         intr[0x8];
 
-	u8         reserved_9[0x3];
+	u8         reserved_at_c0[0x3];
 	u8         log_page_size[0x5];
-	u8         reserved_10[0x18];
+	u8         reserved_at_c8[0x18];
 
-	u8         reserved_11[0x60];
+	u8         reserved_at_e0[0x60];
 
-	u8         reserved_12[0x8];
+	u8         reserved_at_140[0x8];
 	u8         consumer_counter[0x18];
 
-	u8         reserved_13[0x8];
+	u8         reserved_at_160[0x8];
 	u8         producer_counter[0x18];
 
-	u8         reserved_14[0x80];
+	u8         reserved_at_180[0x80];
 };
 
 enum {
@@ -2445,14 +2445,14 @@
 };
 
 struct mlx5_ifc_dctc_bits {
-	u8         reserved_0[0x4];
+	u8         reserved_at_0[0x4];
 	u8         state[0x4];
-	u8         reserved_1[0x18];
+	u8         reserved_at_8[0x18];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
 	u8         counter_set_id[0x8];
@@ -2464,45 +2464,45 @@
 	u8         latency_sensitive[0x1];
 	u8         rlky[0x1];
 	u8         free_ar[0x1];
-	u8         reserved_4[0xd];
+	u8         reserved_at_73[0xd];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_80[0x8];
 	u8         cs_res[0x8];
-	u8         reserved_6[0x3];
+	u8         reserved_at_90[0x3];
 	u8         min_rnr_nak[0x5];
-	u8         reserved_7[0x8];
+	u8         reserved_at_98[0x8];
 
-	u8         reserved_8[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_9[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
 
 	u8         tclass[0x8];
-	u8         reserved_10[0x4];
+	u8         reserved_at_e8[0x4];
 	u8         flow_label[0x14];
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_11[0x5];
+	u8         reserved_at_140[0x5];
 	u8         mtu[0x3];
 	u8         port[0x8];
 	u8         pkey_index[0x10];
 
-	u8         reserved_12[0x8];
+	u8         reserved_at_160[0x8];
 	u8         my_addr_index[0x8];
-	u8         reserved_13[0x8];
+	u8         reserved_at_170[0x8];
 	u8         hop_limit[0x8];
 
 	u8         dc_access_key_violation_count[0x20];
 
-	u8         reserved_14[0x14];
+	u8         reserved_at_1a0[0x14];
 	u8         dei_cfi[0x1];
 	u8         eth_prio[0x3];
 	u8         ecn[0x2];
 	u8         dscp[0x6];
 
-	u8         reserved_15[0x40];
+	u8         reserved_at_1c0[0x40];
 };
 
 enum {
@@ -2524,54 +2524,54 @@
 
 struct mlx5_ifc_cqc_bits {
 	u8         status[0x4];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         cqe_sz[0x3];
 	u8         cc[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_c[0x1];
 	u8         scqe_break_moderation_en[0x1];
 	u8         oi[0x1];
-	u8         reserved_2[0x2];
+	u8         reserved_at_f[0x2];
 	u8         cqe_zip_en[0x1];
 	u8         mini_cqe_res_format[0x2];
 	u8         st[0x4];
-	u8         reserved_3[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_5[0x14];
+	u8         reserved_at_40[0x14];
 	u8         page_offset[0x6];
-	u8         reserved_6[0x6];
+	u8         reserved_at_5a[0x6];
 
-	u8         reserved_7[0x3];
+	u8         reserved_at_60[0x3];
 	u8         log_cq_size[0x5];
 	u8         uar_page[0x18];
 
-	u8         reserved_8[0x4];
+	u8         reserved_at_80[0x4];
 	u8         cq_period[0xc];
 	u8         cq_max_count[0x10];
 
-	u8         reserved_9[0x18];
+	u8         reserved_at_a0[0x18];
 	u8         c_eqn[0x8];
 
-	u8         reserved_10[0x3];
+	u8         reserved_at_c0[0x3];
 	u8         log_page_size[0x5];
-	u8         reserved_11[0x18];
+	u8         reserved_at_c8[0x18];
 
-	u8         reserved_12[0x20];
+	u8         reserved_at_e0[0x20];
 
-	u8         reserved_13[0x8];
+	u8         reserved_at_100[0x8];
 	u8         last_notified_index[0x18];
 
-	u8         reserved_14[0x8];
+	u8         reserved_at_120[0x8];
 	u8         last_solicit_index[0x18];
 
-	u8         reserved_15[0x8];
+	u8         reserved_at_140[0x8];
 	u8         consumer_counter[0x18];
 
-	u8         reserved_16[0x8];
+	u8         reserved_at_160[0x8];
 	u8         producer_counter[0x18];
 
-	u8         reserved_17[0x40];
+	u8         reserved_at_180[0x40];
 
 	u8         dbr_addr[0x40];
 };
@@ -2580,16 +2580,16 @@
 	struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp;
 	struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp;
 	struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np;
-	u8         reserved_0[0x800];
+	u8         reserved_at_0[0x800];
 };
 
 struct mlx5_ifc_query_adapter_param_block_bits {
-	u8         reserved_0[0xc0];
+	u8         reserved_at_0[0xc0];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         ieee_vendor_id[0x18];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_e0[0x10];
 	u8         vsd_vendor_id[0x10];
 
 	u8         vsd[208][0x8];
@@ -2600,14 +2600,14 @@
 union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
 	struct mlx5_ifc_modify_field_select_bits modify_field_select;
 	struct mlx5_ifc_resize_field_select_bits resize_field_select;
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 };
 
 union mlx5_ifc_field_select_802_1_r_roce_auto_bits {
 	struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp;
 	struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp;
 	struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np;
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 };
 
 union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
@@ -2619,7 +2619,7 @@
 	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
 	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
-	u8         reserved_0[0x7c0];
+	u8         reserved_at_0[0x7c0];
 };
 
 union mlx5_ifc_event_auto_bits {
@@ -2635,23 +2635,23 @@
 	struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event;
 	struct mlx5_ifc_stall_vl_event_bits stall_vl_event;
 	struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event;
-	u8         reserved_0[0xe0];
+	u8         reserved_at_0[0xe0];
 };
 
 struct mlx5_ifc_health_buffer_bits {
-	u8         reserved_0[0x100];
+	u8         reserved_at_0[0x100];
 
 	u8         assert_existptr[0x20];
 
 	u8         assert_callra[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_140[0x40];
 
 	u8         fw_version[0x20];
 
 	u8         hw_id[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_1c0[0x20];
 
 	u8         irisc_index[0x8];
 	u8         synd[0x8];
@@ -2660,20 +2660,20 @@
 
 struct mlx5_ifc_register_loopback_control_bits {
 	u8         no_lb[0x1];
-	u8         reserved_0[0x7];
+	u8         reserved_at_1[0x7];
 	u8         port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_teardown_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -2683,108 +2683,108 @@
 
 struct mlx5_ifc_teardown_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         profile[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_sqerr2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_sqerr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_sqd2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_sqd2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_set_roce_address_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_roce_address_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         roce_address_index[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_50[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_roce_addr_layout_bits roce_address;
 };
 
 struct mlx5_ifc_set_mad_demux_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -2794,89 +2794,89 @@
 
 struct mlx5_ifc_set_mad_demux_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x6];
+	u8         reserved_at_60[0x6];
 	u8         demux_mode[0x2];
-	u8         reserved_4[0x18];
+	u8         reserved_at_68[0x18];
 };
 
 struct mlx5_ifc_set_l2_table_entry_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_l2_table_entry_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_40[0x60];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_c0[0x20];
 
-	u8         reserved_5[0x13];
+	u8         reserved_at_e0[0x13];
 	u8         vlan_valid[0x1];
 	u8         vlan[0xc];
 
 	struct mlx5_ifc_mac_address_layout_bits mac_address;
 
-	u8         reserved_6[0xc0];
+	u8         reserved_at_140[0xc0];
 };
 
 struct mlx5_ifc_set_issi_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_issi_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         current_issi[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_set_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_hca_cap_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	union mlx5_ifc_hca_cap_union_bits capability;
 };
@@ -2890,156 +2890,156 @@
 
 struct mlx5_ifc_set_fte_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_fte_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x18];
+	u8         reserved_at_c0[0x18];
 	u8         modify_enable_mask[0x8];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_e0[0x20];
 
 	u8         flow_index[0x20];
 
-	u8         reserved_7[0xe0];
+	u8         reserved_at_120[0xe0];
 
 	struct mlx5_ifc_flow_context_bits flow_context;
 };
 
 struct mlx5_ifc_rts2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_rtr2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_rst2init_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_query_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -3049,13 +3049,13 @@
 
 struct mlx5_ifc_query_vport_state_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_60[0x18];
 	u8         admin_state[0x4];
 	u8         state[0x4];
 };
@@ -3067,25 +3067,25 @@
 
 struct mlx5_ifc_query_vport_state_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_vport_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_traffic_counter_bits received_errors;
 
@@ -3111,7 +3111,7 @@
 
 	struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast;
 
-	u8         reserved_2[0xa00];
+	u8         reserved_at_680[0xa00];
 };
 
 enum {
@@ -3120,328 +3120,328 @@
 
 struct mlx5_ifc_query_vport_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x60];
+	u8         reserved_at_60[0x60];
 
 	u8         clear[0x1];
-	u8         reserved_4[0x1f];
+	u8         reserved_at_c1[0x1f];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_e0[0x20];
 };
 
 struct mlx5_ifc_query_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_tisc_bits tis_context;
 };
 
 struct mlx5_ifc_query_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_tirc_bits tir_context;
 };
 
 struct mlx5_ifc_query_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_srqc_bits srq_context_entry;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_sqc_bits sq_context;
 };
 
 struct mlx5_ifc_query_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         sqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_special_contexts_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         resd_lkey[0x20];
 };
 
 struct mlx5_ifc_query_special_contexts_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqtc_bits rqt_context;
 };
 
 struct mlx5_ifc_query_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqc_bits rq_context;
 };
 
 struct mlx5_ifc_query_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_roce_address_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_roce_addr_layout_bits roce_address;
 };
 
 struct mlx5_ifc_query_roce_address_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         roce_address_index[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_50[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rmpc_bits rmp_context;
 };
 
 struct mlx5_ifc_query_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_800[0x80];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_q_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         rx_write_requests[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_a0[0x20];
 
 	u8         rx_read_requests[0x20];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_e0[0x20];
 
 	u8         rx_atomic_requests[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_120[0x20];
 
 	u8         rx_dct_connect[0x20];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_160[0x20];
 
 	u8         out_of_buffer[0x20];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_1a0[0x20];
 
 	u8         out_of_sequence[0x20];
 
-	u8         reserved_7[0x620];
+	u8         reserved_at_1e0[0x620];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x80];
+	u8         reserved_at_40[0x80];
 
 	u8         clear[0x1];
-	u8         reserved_3[0x1f];
+	u8         reserved_at_c1[0x1f];
 
-	u8         reserved_4[0x18];
+	u8         reserved_at_e0[0x18];
 	u8         counter_set_id[0x8];
 };
 
 struct mlx5_ifc_query_pages_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
 	u8         num_pages[0x20];
@@ -3455,55 +3455,55 @@
 
 struct mlx5_ifc_query_pages_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_nic_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
 };
 
 struct mlx5_ifc_query_nic_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x5];
+	u8         reserved_at_60[0x5];
 	u8         allowed_list_type[0x3];
-	u8         reserved_4[0x18];
+	u8         reserved_at_68[0x18];
 };
 
 struct mlx5_ifc_query_mkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         bsf0_klm0_pas_mtt0_1[16][0x8];
 
@@ -3512,265 +3512,265 @@
 
 struct mlx5_ifc_query_mkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         mkey_index[0x18];
 
 	u8         pg_access[0x1];
-	u8         reserved_3[0x1f];
+	u8         reserved_at_61[0x1f];
 };
 
 struct mlx5_ifc_query_mad_demux_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         mad_dumux_parameters_block[0x20];
 };
 
 struct mlx5_ifc_query_mad_demux_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_l2_table_entry_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xa0];
+	u8         reserved_at_40[0xa0];
 
-	u8         reserved_2[0x13];
+	u8         reserved_at_e0[0x13];
 	u8         vlan_valid[0x1];
 	u8         vlan[0xc];
 
 	struct mlx5_ifc_mac_address_layout_bits mac_address;
 
-	u8         reserved_3[0xc0];
+	u8         reserved_at_140[0xc0];
 };
 
 struct mlx5_ifc_query_l2_table_entry_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_40[0x60];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
 
-	u8         reserved_4[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_query_issi_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_40[0x10];
 	u8         current_issi[0x10];
 
-	u8         reserved_2[0xa0];
+	u8         reserved_at_60[0xa0];
 
-	u8         supported_issi_reserved[76][0x8];
+	u8         reserved_at_100[76][0x8];
 	u8         supported_issi_dw0[0x20];
 };
 
 struct mlx5_ifc_query_issi_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_pkey_bits pkey[0];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         pkey_index[0x10];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         gids_num[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_70[0x10];
 
 	struct mlx5_ifc_array128_auto_bits gid[0];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         gid_index[0x10];
 };
 
 struct mlx5_ifc_query_hca_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
 };
 
 struct mlx5_ifc_query_hca_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	union mlx5_ifc_hca_cap_union_bits capability;
 };
 
 struct mlx5_ifc_query_hca_cap_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x80];
+	u8         reserved_at_40[0x80];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         level[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_d0[0x8];
 	u8         log_size[0x8];
 
-	u8         reserved_4[0x120];
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_query_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_query_fte_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x1c0];
+	u8         reserved_at_40[0x1c0];
 
 	struct mlx5_ifc_flow_context_bits flow_context;
 };
 
 struct mlx5_ifc_query_fte_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x40];
+	u8         reserved_at_c0[0x40];
 
 	u8         flow_index[0x20];
 
-	u8         reserved_6[0xe0];
+	u8         reserved_at_120[0xe0];
 };
 
 enum {
@@ -3781,84 +3781,84 @@
 
 struct mlx5_ifc_query_flow_group_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xa0];
+	u8         reserved_at_40[0xa0];
 
 	u8         start_flow_index[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_100[0x20];
 
 	u8         end_flow_index[0x20];
 
-	u8         reserved_3[0xa0];
+	u8         reserved_at_140[0xa0];
 
-	u8         reserved_4[0x18];
+	u8         reserved_at_1e0[0x18];
 	u8         match_criteria_enable[0x8];
 
 	struct mlx5_ifc_fte_match_param_bits match_criteria;
 
-	u8         reserved_5[0xe00];
+	u8         reserved_at_1200[0xe00];
 };
 
 struct mlx5_ifc_query_flow_group_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
 	u8         group_id[0x20];
 
-	u8         reserved_5[0x120];
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_query_esw_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_esw_vport_context_bits esw_vport_context;
 };
 
 struct mlx5_ifc_query_esw_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_modify_esw_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_esw_vport_context_fields_select_bits {
-	u8         reserved[0x1c];
+	u8         reserved_at_0[0x1c];
 	u8         vport_cvlan_insert[0x1];
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_strip[0x1];
@@ -3867,13 +3867,13 @@
 
 struct mlx5_ifc_modify_esw_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
 	struct mlx5_ifc_esw_vport_context_fields_select_bits field_select;
@@ -3883,124 +3883,124 @@
 
 struct mlx5_ifc_query_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_eqc_bits eq_context_entry;
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_280[0x40];
 
 	u8         event_bitmask[0x40];
 
-	u8         reserved_3[0x580];
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_eq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_dctc_bits dct_context_entry;
 
-	u8         reserved_2[0x180];
+	u8         reserved_at_280[0x180];
 };
 
 struct mlx5_ifc_query_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_status_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         enable[0x1];
 	u8         tag_enable[0x1];
-	u8         reserved_2[0x1e];
+	u8         reserved_at_62[0x1e];
 };
 
 struct mlx5_ifc_query_cong_status_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         priority[0x4];
 	u8         cong_protocol[0x4];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_statistics_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         cur_flows[0x20];
 
@@ -4014,7 +4014,7 @@
 
 	u8         cnp_handled_low[0x20];
 
-	u8         reserved_2[0x100];
+	u8         reserved_at_140[0x100];
 
 	u8         time_stamp_high[0x20];
 
@@ -4030,453 +4030,455 @@
 
 	u8         cnps_sent_low[0x20];
 
-	u8         reserved_3[0x560];
+	u8         reserved_at_320[0x560];
 };
 
 struct mlx5_ifc_query_cong_statistics_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         clear[0x1];
-	u8         reserved_2[0x1f];
+	u8         reserved_at_41[0x1f];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_params_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
 };
 
 struct mlx5_ifc_query_cong_params_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x1c];
+	u8         reserved_at_40[0x1c];
 	u8         cong_protocol[0x4];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_adapter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct;
 };
 
 struct mlx5_ifc_query_adapter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2rst_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2rst_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_qp_2err_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2err_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_page_fault_resume_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_page_fault_resume_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         error[0x1];
-	u8         reserved_2[0x4];
+	u8         reserved_at_41[0x4];
 	u8         rdma[0x1];
 	u8         read_write[0x1];
 	u8         req_res[0x1];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_nop_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_nop_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_vport_state_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_vport_state_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x18];
+	u8         reserved_at_60[0x18];
 	u8         admin_state[0x4];
-	u8         reserved_4[0x4];
+	u8         reserved_at_7c[0x4];
 };
 
 struct mlx5_ifc_modify_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_tis_bitmask_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
-	u8         reserved_1[0x1f];
+	u8         reserved_at_20[0x1f];
 	u8         prio[0x1];
 };
 
 struct mlx5_ifc_modify_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_tisc_bits ctx;
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved_0[0x20];
+	u8	   reserved_at_0[0x20];
 
-	u8         reserved_1[0x1b];
+	u8         reserved_at_20[0x1b];
 	u8         self_lb_en[0x1];
-	u8         reserved_2[0x3];
+	u8         reserved_at_3c[0x1];
+	u8         hash[0x1];
+	u8         reserved_at_3e[0x1];
 	u8         lro[0x1];
 };
 
 struct mlx5_ifc_modify_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_modify_tir_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_tirc_bits ctx;
 };
 
 struct mlx5_ifc_modify_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         sq_state[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         sqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         modify_bitmask[0x40];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_sqc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rqt_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_at_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_at_20[0x1f];
 	u8         rqn_list[0x1];
 };
 
 struct mlx5_ifc_modify_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_rqt_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_rqtc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         rq_state[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         rqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         modify_bitmask[0x40];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_rqc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rmp_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_at_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_at_20[0x1f];
 	u8         lwm[0x1];
 };
 
 struct mlx5_ifc_modify_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         rmp_state[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         rmpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_rmp_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_rmpc_bits ctx;
 };
 
 struct mlx5_ifc_modify_nic_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_nic_vport_field_select_bits {
-	u8         reserved_0[0x19];
+	u8         reserved_at_0[0x19];
 	u8         mtu[0x1];
 	u8         change_event[0x1];
 	u8         promisc[0x1];
 	u8         permanent_address[0x1];
 	u8         addresses_list[0x1];
 	u8         roce_en[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_1f[0x1];
 };
 
 struct mlx5_ifc_modify_nic_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
 	struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
 
-	u8         reserved_3[0x780];
+	u8         reserved_at_80[0x780];
 
 	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
 };
 
 struct mlx5_ifc_modify_hca_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_hca_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
 };
 
 struct mlx5_ifc_modify_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -4486,83 +4488,83 @@
 
 struct mlx5_ifc_modify_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
 	union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select;
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         priority[0x4];
 	u8         cong_protocol[0x4];
 
 	u8         enable[0x1];
 	u8         tag_enable[0x1];
-	u8         reserved_3[0x1e];
+	u8         reserved_at_62[0x1e];
 };
 
 struct mlx5_ifc_modify_cong_params_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_cong_params_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x1c];
+	u8         reserved_at_40[0x1c];
 	u8         cong_protocol[0x4];
 
 	union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select;
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_80[0x80];
 
 	union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
 };
 
 struct mlx5_ifc_manage_pages_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
 	u8         output_num_entries[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         pas[0][0x40];
 };
@@ -4575,12 +4577,12 @@
 
 struct mlx5_ifc_manage_pages_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
 	u8         input_num_entries[0x20];
@@ -4590,117 +4592,117 @@
 
 struct mlx5_ifc_mad_ifc_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         response_mad_packet[256][0x8];
 };
 
 struct mlx5_ifc_mad_ifc_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         remote_lid[0x10];
-	u8         reserved_2[0x8];
+	u8         reserved_at_50[0x8];
 	u8         port[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         mad[256][0x8];
 };
 
 struct mlx5_ifc_init_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2rtr_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_init2init_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_get_dropped_packet_log_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         packet_headers_log[128][0x8];
 
@@ -4709,1029 +4711,1029 @@
 
 struct mlx5_ifc_get_dropped_packet_log_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_gen_eqe_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         eqe[64][0x8];
 };
 
 struct mlx5_ifc_gen_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_enable_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 };
 
 struct mlx5_ifc_enable_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_drain_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_drain_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_disable_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 };
 
 struct mlx5_ifc_disable_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_detach_from_mcg_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         multicast_gid[16][0x8];
 };
 
 struct mlx5_ifc_destroy_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         sqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_psv_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_psv_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         psvn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_mkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_mkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         mkey_index[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_destroy_flow_group_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_flow_group_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
 	u8         group_id[0x20];
 
-	u8         reserved_5[0x120];
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_destroy_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_eq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_delete_vxlan_udp_dport_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_vxlan_udp_dport_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         vxlan_udp_port[0x10];
 };
 
 struct mlx5_ifc_delete_l2_table_entry_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_l2_table_entry_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_40[0x60];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
 
-	u8         reserved_4[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_delete_fte_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_fte_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x40];
+	u8         reserved_at_c0[0x40];
 
 	u8         flow_index[0x20];
 
-	u8         reserved_6[0xe0];
+	u8         reserved_at_120[0xe0];
 };
 
 struct mlx5_ifc_dealloc_xrcd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrcd[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_uar_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_uar_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         uar[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_transport_domain_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_transport_domain_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         transport_domain[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_q_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_q_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         counter_set_id[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_pd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         pd[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_tisc_bits ctx;
 };
 
 struct mlx5_ifc_create_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_tirc_bits ctx;
 };
 
 struct mlx5_ifc_create_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_srqc_bits srq_context_entry;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         sqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_sqc_bits ctx;
 };
 
 struct mlx5_ifc_create_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqtc_bits rqt_context;
 };
 
 struct mlx5_ifc_create_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqc_bits ctx;
 };
 
 struct mlx5_ifc_create_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rmpc_bits ctx;
 };
 
 struct mlx5_ifc_create_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_4[0x80];
+	u8         reserved_at_800[0x80];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_psv_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_80[0x8];
 	u8         psv0_index[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         psv1_index[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         psv2_index[0x18];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         psv3_index[0x18];
 };
 
 struct mlx5_ifc_create_psv_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         num_psv[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         pd[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_mkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         mkey_index[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_mkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         pg_access[0x1];
-	u8         reserved_3[0x1f];
+	u8         reserved_at_61[0x1f];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-	u8         reserved_4[0x80];
+	u8         reserved_at_280[0x80];
 
 	u8         translations_octword_actual_size[0x20];
 
-	u8         reserved_5[0x560];
+	u8         reserved_at_320[0x560];
 
 	u8         klm_pas_mtt[0][0x20];
 };
 
 struct mlx5_ifc_create_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
-	u8         reserved_5[0x4];
+	u8         reserved_at_c0[0x4];
 	u8         table_miss_mode[0x4];
 	u8         level[0x8];
-	u8         reserved_6[0x8];
+	u8         reserved_at_d0[0x8];
 	u8         log_size[0x8];
 
-	u8         reserved_7[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         table_miss_id[0x18];
 
-	u8         reserved_8[0x100];
+	u8         reserved_at_100[0x100];
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         group_id[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -5742,134 +5744,134 @@
 
 struct mlx5_ifc_create_flow_group_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_c0[0x20];
 
 	u8         start_flow_index[0x20];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_100[0x20];
 
 	u8         end_flow_index[0x20];
 
-	u8         reserved_7[0xa0];
+	u8         reserved_at_140[0xa0];
 
-	u8         reserved_8[0x18];
+	u8         reserved_at_1e0[0x18];
 	u8         match_criteria_enable[0x8];
 
 	struct mlx5_ifc_fte_match_param_bits match_criteria;
 
-	u8         reserved_9[0xe00];
+	u8         reserved_at_1200[0xe00];
 };
 
 struct mlx5_ifc_create_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_eq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_eqc_bits eq_context_entry;
 
-	u8         reserved_3[0x40];
+	u8         reserved_at_280[0x40];
 
 	u8         event_bitmask[0x40];
 
-	u8         reserved_4[0x580];
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_dctc_bits dct_context_entry;
 
-	u8         reserved_3[0x180];
+	u8         reserved_at_280[0x180];
 };
 
 struct mlx5_ifc_create_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_config_int_moderation_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x4];
+	u8         reserved_at_40[0x4];
 	u8         min_delay[0xc];
 	u8         int_vector[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -5879,49 +5881,49 @@
 
 struct mlx5_ifc_config_int_moderation_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x4];
+	u8         reserved_at_40[0x4];
 	u8         min_delay[0xc];
 	u8         int_vector[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_attach_to_mcg_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         multicast_gid[16][0x8];
 };
 
 struct mlx5_ifc_arm_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -5930,25 +5932,25 @@
 
 struct mlx5_ifc_arm_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         lwm[0x10];
 };
 
 struct mlx5_ifc_arm_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -5957,179 +5959,179 @@
 
 struct mlx5_ifc_arm_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srq_number[0x18];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         lwm[0x10];
 };
 
 struct mlx5_ifc_arm_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_arm_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dct_number[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_xrcd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrcd[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_uar_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         uar[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_uar_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_transport_domain_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         transport_domain[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_transport_domain_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_q_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x18];
+	u8         reserved_at_40[0x18];
 	u8         counter_set_id[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_q_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_pd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         pd[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         vxlan_udp_port[0x10];
 };
 
 struct mlx5_ifc_access_register_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         register_data[0][0x20];
 };
@@ -6141,12 +6143,12 @@
 
 struct mlx5_ifc_access_register_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         register_id[0x10];
 
 	u8         argument[0x20];
@@ -6159,24 +6161,24 @@
 	u8         version[0x4];
 	u8         local_port[0x8];
 	u8         pnat[0x2];
-	u8         reserved_0[0x2];
+	u8         reserved_at_12[0x2];
 	u8         lane[0x4];
-	u8         reserved_1[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_3[0x7];
+	u8         reserved_at_40[0x7];
 	u8         polarity[0x1];
 	u8         ob_tap0[0x8];
 	u8         ob_tap1[0x8];
 	u8         ob_tap2[0x8];
 
-	u8         reserved_4[0xc];
+	u8         reserved_at_60[0xc];
 	u8         ob_preemp_mode[0x4];
 	u8         ob_reg[0x8];
 	u8         ob_bias[0x8];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_80[0x20];
 };
 
 struct mlx5_ifc_slrg_reg_bits {
@@ -6184,36 +6186,36 @@
 	u8         version[0x4];
 	u8         local_port[0x8];
 	u8         pnat[0x2];
-	u8         reserved_0[0x2];
+	u8         reserved_at_12[0x2];
 	u8         lane[0x4];
-	u8         reserved_1[0x8];
+	u8         reserved_at_18[0x8];
 
 	u8         time_to_link_up[0x10];
-	u8         reserved_2[0xc];
+	u8         reserved_at_30[0xc];
 	u8         grade_lane_speed[0x4];
 
 	u8         grade_version[0x8];
 	u8         grade[0x18];
 
-	u8         reserved_3[0x4];
+	u8         reserved_at_60[0x4];
 	u8         height_grade_type[0x4];
 	u8         height_grade[0x18];
 
 	u8         height_dz[0x10];
 	u8         height_dv[0x10];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         height_sigma[0x10];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_c0[0x20];
 
-	u8         reserved_6[0x4];
+	u8         reserved_at_e0[0x4];
 	u8         phase_grade_type[0x4];
 	u8         phase_grade[0x18];
 
-	u8         reserved_7[0x8];
+	u8         reserved_at_100[0x8];
 	u8         phase_eo_pos[0x8];
-	u8         reserved_8[0x8];
+	u8         reserved_at_110[0x8];
 	u8         phase_eo_neg[0x8];
 
 	u8         ffe_set_tested[0x10];
@@ -6221,70 +6223,70 @@
 };
 
 struct mlx5_ifc_pvlc_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x1c];
+	u8         reserved_at_20[0x1c];
 	u8         vl_hw_cap[0x4];
 
-	u8         reserved_3[0x1c];
+	u8         reserved_at_40[0x1c];
 	u8         vl_admin[0x4];
 
-	u8         reserved_4[0x1c];
+	u8         reserved_at_60[0x1c];
 	u8         vl_operational[0x4];
 };
 
 struct mlx5_ifc_pude_reg_bits {
 	u8         swid[0x8];
 	u8         local_port[0x8];
-	u8         reserved_0[0x4];
+	u8         reserved_at_10[0x4];
 	u8         admin_status[0x4];
-	u8         reserved_1[0x4];
+	u8         reserved_at_18[0x4];
 	u8         oper_status[0x4];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0xd];
+	u8         reserved_at_10[0xd];
 	u8         proto_mask[0x3];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_20[0x40];
 
 	u8         eth_proto_capability[0x20];
 
 	u8         ib_link_width_capability[0x10];
 	u8         ib_proto_capability[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_a0[0x20];
 
 	u8         eth_proto_admin[0x20];
 
 	u8         ib_link_width_admin[0x10];
 	u8         ib_proto_admin[0x10];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_100[0x20];
 
 	u8         eth_proto_oper[0x20];
 
 	u8         ib_link_width_oper[0x10];
 	u8         ib_proto_oper[0x10];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_160[0x20];
 
 	u8         eth_proto_lp_advertise[0x20];
 
-	u8         reserved_6[0x60];
+	u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_ptas_reg_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         algorithm_options[0x10];
-	u8         reserved_1[0x4];
+	u8         reserved_at_30[0x4];
 	u8         repetitions_mode[0x4];
 	u8         num_of_repetitions[0x8];
 
@@ -6310,13 +6312,13 @@
 	u8         ndeo_error_threshold[0x10];
 
 	u8         mixer_offset_step_size[0x10];
-	u8         reserved_2[0x8];
+	u8         reserved_at_110[0x8];
 	u8         mix90_phase_for_voltage_bath[0x8];
 
 	u8         mixer_offset_start[0x10];
 	u8         mixer_offset_end[0x10];
 
-	u8         reserved_3[0x15];
+	u8         reserved_at_140[0x15];
 	u8         ber_test_time[0xb];
 };
 
@@ -6324,154 +6326,154 @@
 	u8         swid[0x8];
 	u8         local_port[0x8];
 	u8         sub_port[0x8];
-	u8         reserved_0[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_pqdr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x5];
+	u8         reserved_at_10[0x5];
 	u8         prio[0x3];
-	u8         reserved_2[0x6];
+	u8         reserved_at_18[0x6];
 	u8         mode[0x2];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_40[0x10];
 	u8         min_threshold[0x10];
 
-	u8         reserved_5[0x10];
+	u8         reserved_at_60[0x10];
 	u8         max_threshold[0x10];
 
-	u8         reserved_6[0x10];
+	u8         reserved_at_80[0x10];
 	u8         mark_probability_denominator[0x10];
 
-	u8         reserved_7[0x60];
+	u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_ppsc_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 
-	u8         reserved_3[0x1c];
+	u8         reserved_at_80[0x1c];
 	u8         wrps_admin[0x4];
 
-	u8         reserved_4[0x1c];
+	u8         reserved_at_a0[0x1c];
 	u8         wrps_status[0x4];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         up_threshold[0x8];
-	u8         reserved_6[0x8];
+	u8         reserved_at_d0[0x8];
 	u8         down_threshold[0x8];
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_e0[0x20];
 
-	u8         reserved_8[0x1c];
+	u8         reserved_at_100[0x1c];
 	u8         srps_admin[0x4];
 
-	u8         reserved_9[0x1c];
+	u8         reserved_at_120[0x1c];
 	u8         srps_status[0x4];
 
-	u8         reserved_10[0x40];
+	u8         reserved_at_140[0x40];
 };
 
 struct mlx5_ifc_pplr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_20[0x8];
 	u8         lb_cap[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_30[0x8];
 	u8         lb_en[0x8];
 };
 
 struct mlx5_ifc_pplm_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 
 	u8         port_profile_mode[0x8];
 	u8         static_port_profile[0x8];
 	u8         active_port_profile[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_58[0x8];
 
 	u8         retransmission_active[0x8];
 	u8         fec_mode_active[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_80[0x20];
 };
 
 struct mlx5_ifc_ppcnt_reg_bits {
 	u8         swid[0x8];
 	u8         local_port[0x8];
 	u8         pnat[0x2];
-	u8         reserved_0[0x8];
+	u8         reserved_at_12[0x8];
 	u8         grp[0x6];
 
 	u8         clr[0x1];
-	u8         reserved_1[0x1c];
+	u8         reserved_at_21[0x1c];
 	u8         prio_tc[0x3];
 
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
 struct mlx5_ifc_ppad_reg_bits {
-	u8         reserved_0[0x3];
+	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
-	u8         reserved_1[0x4];
+	u8         reserved_at_4[0x4];
 	u8         local_port[0x8];
 	u8         mac_47_32[0x10];
 
 	u8         mac_31_0[0x20];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_pmtu_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         max_mtu[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_30[0x10];
 
 	u8         admin_mtu[0x10];
-	u8         reserved_3[0x10];
+	u8         reserved_at_50[0x10];
 
 	u8         oper_mtu[0x10];
-	u8         reserved_4[0x10];
+	u8         reserved_at_70[0x10];
 };
 
 struct mlx5_ifc_pmpr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         module[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_20[0x18];
 	u8         attenuation_5g[0x8];
 
-	u8         reserved_3[0x18];
+	u8         reserved_at_40[0x18];
 	u8         attenuation_7g[0x8];
 
-	u8         reserved_4[0x18];
+	u8         reserved_at_60[0x18];
 	u8         attenuation_12g[0x8];
 };
 
 struct mlx5_ifc_pmpe_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         module[0x8];
-	u8         reserved_1[0xc];
+	u8         reserved_at_10[0xc];
 	u8         module_status[0x4];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_pmpc_reg_bits {
@@ -6479,20 +6481,20 @@
 };
 
 struct mlx5_ifc_pmlpn_reg_bits {
-	u8         reserved_0[0x4];
+	u8         reserved_at_0[0x4];
 	u8         mlpn_status[0x4];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         e[0x1];
-	u8         reserved_2[0x1f];
+	u8         reserved_at_21[0x1f];
 };
 
 struct mlx5_ifc_pmlp_reg_bits {
 	u8         rxtx[0x1];
-	u8         reserved_0[0x7];
+	u8         reserved_at_1[0x7];
 	u8         local_port[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_10[0x8];
 	u8         width[0x8];
 
 	u8         lane0_module_mapping[0x20];
@@ -6503,36 +6505,36 @@
 
 	u8         lane3_module_mapping[0x20];
 
-	u8         reserved_2[0x160];
+	u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_pmaos_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         module[0x8];
-	u8         reserved_1[0x4];
+	u8         reserved_at_10[0x4];
 	u8         admin_status[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_18[0x4];
 	u8         oper_status[0x4];
 
 	u8         ase[0x1];
 	u8         ee[0x1];
-	u8         reserved_3[0x1c];
+	u8         reserved_at_22[0x1c];
 	u8         e[0x2];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_plpc_reg_bits {
-	u8         reserved_0[0x4];
+	u8         reserved_at_0[0x4];
 	u8         profile_id[0xc];
-	u8         reserved_1[0x4];
+	u8         reserved_at_10[0x4];
 	u8         proto_mask[0x4];
-	u8         reserved_2[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_20[0x10];
 	u8         lane_speed[0x10];
 
-	u8         reserved_4[0x17];
+	u8         reserved_at_40[0x17];
 	u8         lpbf[0x1];
 	u8         fec_mode_policy[0x8];
 
@@ -6545,44 +6547,44 @@
 	u8         retransmission_request_admin[0x8];
 	u8         fec_mode_request_admin[0x18];
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_c0[0x80];
 };
 
 struct mlx5_ifc_plib_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_10[0x8];
 	u8         ib_port[0x8];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_plbf_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0xd];
+	u8         reserved_at_10[0xd];
 	u8         lbf_mode[0x3];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_pipg_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         dic[0x1];
-	u8         reserved_2[0x19];
+	u8         reserved_at_21[0x19];
 	u8         ipg[0x4];
-	u8         reserved_3[0x2];
+	u8         reserved_at_3e[0x2];
 };
 
 struct mlx5_ifc_pifr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0xe0];
+	u8         reserved_at_20[0xe0];
 
 	u8         port_filter[8][0x20];
 
@@ -6590,36 +6592,36 @@
 };
 
 struct mlx5_ifc_pfcc_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         ppan[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_24[0x4];
 	u8         prio_mask_tx[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_30[0x8];
 	u8         prio_mask_rx[0x8];
 
 	u8         pptx[0x1];
 	u8         aptx[0x1];
-	u8         reserved_4[0x6];
+	u8         reserved_at_42[0x6];
 	u8         pfctx[0x8];
-	u8         reserved_5[0x10];
+	u8         reserved_at_50[0x10];
 
 	u8         pprx[0x1];
 	u8         aprx[0x1];
-	u8         reserved_6[0x6];
+	u8         reserved_at_62[0x6];
 	u8         pfcrx[0x8];
-	u8         reserved_7[0x10];
+	u8         reserved_at_70[0x10];
 
-	u8         reserved_8[0x80];
+	u8         reserved_at_80[0x80];
 };
 
 struct mlx5_ifc_pelc_reg_bits {
 	u8         op[0x4];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         op_admin[0x8];
 	u8         op_capability[0x8];
@@ -6634,28 +6636,28 @@
 
 	u8         active[0x40];
 
-	u8         reserved_2[0x80];
+	u8         reserved_at_140[0x80];
 };
 
 struct mlx5_ifc_peir_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0xc];
+	u8         reserved_at_20[0xc];
 	u8         error_count[0x4];
-	u8         reserved_3[0x10];
+	u8         reserved_at_30[0x10];
 
-	u8         reserved_4[0xc];
+	u8         reserved_at_40[0xc];
 	u8         lane[0x4];
-	u8         reserved_5[0x8];
+	u8         reserved_at_50[0x8];
 	u8         error_type[0x8];
 };
 
 struct mlx5_ifc_pcap_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         port_capability_mask[4][0x20];
 };
@@ -6663,46 +6665,46 @@
 struct mlx5_ifc_paos_reg_bits {
 	u8         swid[0x8];
 	u8         local_port[0x8];
-	u8         reserved_0[0x4];
+	u8         reserved_at_10[0x4];
 	u8         admin_status[0x4];
-	u8         reserved_1[0x4];
+	u8         reserved_at_18[0x4];
 	u8         oper_status[0x4];
 
 	u8         ase[0x1];
 	u8         ee[0x1];
-	u8         reserved_2[0x1c];
+	u8         reserved_at_22[0x1c];
 	u8         e[0x2];
 
-	u8         reserved_3[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_pamp_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         opamp_group[0x8];
-	u8         reserved_1[0xc];
+	u8         reserved_at_10[0xc];
 	u8         opamp_group_type[0x4];
 
 	u8         start_index[0x10];
-	u8         reserved_2[0x4];
+	u8         reserved_at_30[0x4];
 	u8         num_of_indices[0xc];
 
 	u8         index_data[18][0x10];
 };
 
 struct mlx5_ifc_lane_2_module_mapping_bits {
-	u8         reserved_0[0x6];
+	u8         reserved_at_0[0x6];
 	u8         rx_lane[0x2];
-	u8         reserved_1[0x6];
+	u8         reserved_at_8[0x6];
 	u8         tx_lane[0x2];
-	u8         reserved_2[0x8];
+	u8         reserved_at_10[0x8];
 	u8         module[0x8];
 };
 
 struct mlx5_ifc_bufferx_reg_bits {
-	u8         reserved_0[0x6];
+	u8         reserved_at_0[0x6];
 	u8         lossy[0x1];
 	u8         epsb[0x1];
-	u8         reserved_1[0xc];
+	u8         reserved_at_8[0xc];
 	u8         size[0xc];
 
 	u8         xoff_threshold[0x10];
@@ -6714,21 +6716,21 @@
 };
 
 struct mlx5_ifc_register_power_settings_bits {
-	u8         reserved_0[0x18];
+	u8         reserved_at_0[0x18];
 	u8         power_settings_level[0x8];
 
-	u8         reserved_1[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_register_host_endianness_bits {
 	u8         he[0x1];
-	u8         reserved_0[0x1f];
+	u8         reserved_at_1[0x1f];
 
-	u8         reserved_1[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_umr_pointer_desc_argument_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         mkey[0x20];
 
@@ -6741,7 +6743,7 @@
 	u8         dc_key[0x40];
 
 	u8         ext[0x1];
-	u8         reserved_0[0x7];
+	u8         reserved_at_41[0x7];
 	u8         destination_qp_dct[0x18];
 
 	u8         static_rate[0x4];
@@ -6750,7 +6752,7 @@
 	u8         mlid[0x7];
 	u8         rlid_udp_sport[0x10];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_80[0x20];
 
 	u8         rmac_47_16[0x20];
 
@@ -6758,9 +6760,9 @@
 	u8         tclass[0x8];
 	u8         hop_limit[0x8];
 
-	u8         reserved_2[0x1];
+	u8         reserved_at_e0[0x1];
 	u8         grh[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_at_e2[0x2];
 	u8         src_addr_index[0x8];
 	u8         flow_label[0x14];
 
@@ -6768,27 +6770,27 @@
 };
 
 struct mlx5_ifc_pages_req_event_bits {
-	u8         reserved_0[0x10];
+	u8         reserved_at_0[0x10];
 	u8         function_id[0x10];
 
 	u8         num_pages[0x20];
 
-	u8         reserved_1[0xa0];
+	u8         reserved_at_40[0xa0];
 };
 
 struct mlx5_ifc_eqe_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         event_type[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_10[0x8];
 	u8         event_sub_type[0x8];
 
-	u8         reserved_2[0xe0];
+	u8         reserved_at_20[0xe0];
 
 	union mlx5_ifc_event_auto_bits event_data;
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_1e0[0x10];
 	u8         signature[0x8];
-	u8         reserved_4[0x7];
+	u8         reserved_at_1f8[0x7];
 	u8         owner[0x1];
 };
 
@@ -6798,14 +6800,14 @@
 
 struct mlx5_ifc_cmd_queue_entry_bits {
 	u8         type[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         input_length[0x20];
 
 	u8         input_mailbox_pointer_63_32[0x20];
 
 	u8         input_mailbox_pointer_31_9[0x17];
-	u8         reserved_1[0x9];
+	u8         reserved_at_77[0x9];
 
 	u8         command_input_inline_data[16][0x8];
 
@@ -6814,20 +6816,20 @@
 	u8         output_mailbox_pointer_63_32[0x20];
 
 	u8         output_mailbox_pointer_31_9[0x17];
-	u8         reserved_2[0x9];
+	u8         reserved_at_1b7[0x9];
 
 	u8         output_length[0x20];
 
 	u8         token[0x8];
 	u8         signature[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_1f0[0x8];
 	u8         status[0x7];
 	u8         ownership[0x1];
 };
 
 struct mlx5_ifc_cmd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
@@ -6836,9 +6838,9 @@
 
 struct mlx5_ifc_cmd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         command[0][0x20];
@@ -6847,16 +6849,16 @@
 struct mlx5_ifc_cmd_if_box_bits {
 	u8         mailbox_data[512][0x8];
 
-	u8         reserved_0[0x180];
+	u8         reserved_at_1000[0x180];
 
 	u8         next_pointer_63_32[0x20];
 
 	u8         next_pointer_31_10[0x16];
-	u8         reserved_1[0xa];
+	u8         reserved_at_11b6[0xa];
 
 	u8         block_number[0x20];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_11e0[0x8];
 	u8         token[0x8];
 	u8         ctrl_signature[0x8];
 	u8         signature[0x8];
@@ -6866,7 +6868,7 @@
 	u8         ptag_63_32[0x20];
 
 	u8         ptag_31_8[0x18];
-	u8         reserved_0[0x6];
+	u8         reserved_at_38[0x6];
 	u8         wr_en[0x1];
 	u8         rd_en[0x1];
 };
@@ -6904,38 +6906,38 @@
 	u8         cmd_interface_rev[0x10];
 	u8         fw_rev_subminor[0x10];
 
-	u8         reserved_0[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         cmdq_phy_addr_63_32[0x20];
 
 	u8         cmdq_phy_addr_31_12[0x14];
-	u8         reserved_1[0x2];
+	u8         reserved_at_b4[0x2];
 	u8         nic_interface[0x2];
 	u8         log_cmdq_size[0x4];
 	u8         log_cmdq_stride[0x4];
 
 	u8         command_doorbell_vector[0x20];
 
-	u8         reserved_2[0xf00];
+	u8         reserved_at_e0[0xf00];
 
 	u8         initializing[0x1];
-	u8         reserved_3[0x4];
+	u8         reserved_at_fe1[0x4];
 	u8         nic_interface_supported[0x3];
-	u8         reserved_4[0x18];
+	u8         reserved_at_fe8[0x18];
 
 	struct mlx5_ifc_health_buffer_bits health_buffer;
 
 	u8         no_dram_nic_offset[0x20];
 
-	u8         reserved_5[0x6e40];
+	u8         reserved_at_1220[0x6e40];
 
-	u8         reserved_6[0x1f];
+	u8         reserved_at_8060[0x1f];
 	u8         clear_int[0x1];
 
 	u8         health_syndrome[0x8];
 	u8         health_counter[0x18];
 
-	u8         reserved_7[0x17fc0];
+	u8         reserved_at_80a0[0x17fc0];
 };
 
 union mlx5_ifc_ports_control_registers_document_bits {
@@ -6980,44 +6982,44 @@
 	struct mlx5_ifc_pvlc_reg_bits pvlc_reg;
 	struct mlx5_ifc_slrg_reg_bits slrg_reg;
 	struct mlx5_ifc_sltp_reg_bits sltp_reg;
-	u8         reserved_0[0x60e0];
+	u8         reserved_at_0[0x60e0];
 };
 
 union mlx5_ifc_debug_enhancements_document_bits {
 	struct mlx5_ifc_health_buffer_bits health_buffer;
-	u8         reserved_0[0x200];
+	u8         reserved_at_0[0x200];
 };
 
 union mlx5_ifc_uplink_pci_interface_document_bits {
 	struct mlx5_ifc_initial_seg_bits initial_seg;
-	u8         reserved_0[0x20060];
+	u8         reserved_at_0[0x20060];
 };
 
 struct mlx5_ifc_set_flow_table_root_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_flow_table_root_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 enum {
@@ -7026,39 +7028,39 @@
 
 struct mlx5_ifc_modify_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         modify_field_select[0x10];
 
 	u8         table_type[0x8];
-	u8         reserved_4[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_6[0x4];
+	u8         reserved_at_c0[0x4];
 	u8         table_miss_mode[0x4];
-	u8         reserved_7[0x18];
+	u8         reserved_at_c8[0x18];
 
-	u8         reserved_8[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         table_miss_id[0x18];
 
-	u8         reserved_9[0x100];
+	u8         reserved_at_100[0x100];
 };
 
 #endif /* MLX5_IFC_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1cd22f..3579d1e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -201,11 +201,13 @@
 #endif
 
 #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS	(VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK	VM_GROWSUP
 #else
-#define VM_STACK_FLAGS	(VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK	VM_GROWSDOWN
 #endif
 
+#define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
@@ -385,7 +387,8 @@
 	REGION_MIXED,
 };
 
-int region_intersects(resource_size_t offset, size_t size, const char *type);
+int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
+		      unsigned long desc);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
@@ -1341,8 +1344,7 @@
 		!vma_growsup(vma->vm_next, addr);
 }
 
-extern struct task_struct *task_of_stack(struct task_struct *task,
-				struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
@@ -2137,6 +2139,8 @@
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d3ebb9d..944b2b3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -424,9 +424,9 @@
 	unsigned long total_vm;		/* Total pages mapped */
 	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
 	unsigned long pinned_vm;	/* Refcount permanently increased */
-	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
-	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
+	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+	unsigned long stack_vm;		/* VM_STACK */
 	unsigned long def_flags;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
@@ -566,10 +566,26 @@
 }
 #endif
 
-struct vm_special_mapping
-{
-	const char *name;
+struct vm_fault;
+
+struct vm_special_mapping {
+	const char *name;	/* The name, e.g. "[vdso]". */
+
+	/*
+	 * If .fault is not provided, this points to a
+	 * NULL-terminated array of pages that back the special mapping.
+	 *
+	 * This must not be NULL unless .fault is provided.
+	 */
 	struct page **pages;
+
+	/*
+	 * If non-NULL, then this is called to resolve page faults
+	 * on the special mapping.  If used, .pages is not checked.
+	 */
+	int (*fault)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *vma,
+		     struct vm_fault *vmf);
 };
 
 enum tlb_flush_reason {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 33bb1b1..7b6c2cf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -682,6 +682,12 @@
 	 */
 	unsigned long first_deferred_pfn;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	spinlock_t split_queue_lock;
+	struct list_head split_queue;
+	unsigned long split_queue_len;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
diff --git a/include/linux/module.h b/include/linux/module.h
index 4560d8f..2bb0c30 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -324,6 +324,12 @@
 #define __module_layout_align
 #endif
 
+struct mod_kallsyms {
+	Elf_Sym *symtab;
+	unsigned int num_symtab;
+	char *strtab;
+};
+
 struct module {
 	enum module_state state;
 
@@ -405,15 +411,10 @@
 #endif
 
 #ifdef CONFIG_KALLSYMS
-	/*
-	 * We keep the symbol and string tables for kallsyms.
-	 * The core_* fields below are temporary, loader-only (they
-	 * could really be discarded after module init).
-	 */
-	Elf_Sym *symtab, *core_symtab;
-	unsigned int num_symtab, core_num_syms;
-	char *strtab, *core_strtab;
-
+	/* Protected by RCU and/or module_mutex: use rcu_dereference() */
+	struct mod_kallsyms *kallsyms;
+	struct mod_kallsyms core_kallsyms;
+	
 	/* Section attributes */
 	struct module_sect_attrs *sect_attrs;
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5ac140d..5440b7b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -512,7 +512,6 @@
 	clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
-#ifdef CONFIG_SMP
 /**
  *	napi_synchronize - wait until NAPI is not running
  *	@n: napi context
@@ -523,12 +522,12 @@
  */
 static inline void napi_synchronize(const struct napi_struct *n)
 {
-	while (test_bit(NAPI_STATE_SCHED, &n->state))
-		msleep(1);
+	if (IS_ENABLED(CONFIG_SMP))
+		while (test_bit(NAPI_STATE_SCHED, &n->state))
+			msleep(1);
+	else
+		barrier();
 }
-#else
-# define napi_synchronize(n)	barrier()
-#endif
 
 enum netdev_queue_state_t {
 	__QUEUE_STATE_DRV_XOFF,
@@ -3719,7 +3718,7 @@
 void *netdev_lower_get_next(struct net_device *dev,
 				struct list_head **iter);
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
-	for (iter = &(dev)->adj_list.lower, \
+	for (iter = (dev)->adj_list.lower.next, \
 	     ldev = netdev_lower_get_next(dev, &(iter)); \
 	     ldev; \
 	     ldev = netdev_lower_get_next(dev, &(iter)))
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 48e0320..67300f8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -550,9 +550,7 @@
 
 static inline loff_t nfs_size_to_loff_t(__u64 size)
 {
-	if (size > (__u64) OFFSET_MAX - 1)
-		return OFFSET_MAX - 1;
-	return (loff_t) size;
+	return min_t(u64, size, OFFSET_MAX);
 }
 
 static inline ino_t
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 791098a..d320906 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -275,6 +275,7 @@
 	size_t layoutupdate_len;
 	struct page *layoutupdate_page;
 	struct page **layoutupdate_pages;
+	__be32 *start_p;
 };
 
 struct nfs4_layoutcommit_res {
diff --git a/include/linux/of.h b/include/linux/of.h
index dd10626..dc6e396 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -929,7 +929,7 @@
 	return num;
 }
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
 #define _OF_DECLARE(table, name, compat, fn, fn_type)			\
 	static const struct of_device_id __of_table_##name		\
 		__used __section(__##table##_of_table)			\
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6..2771625 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -988,23 +988,6 @@
 	return pdev->is_managed;
 }
 
-static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq)
-{
-	pdev->irq = irq;
-	pdev->irq_managed = 1;
-}
-
-static inline void pci_reset_managed_irq(struct pci_dev *pdev)
-{
-	pdev->irq = 0;
-	pdev->irq_managed = 0;
-}
-
-static inline bool pci_has_managed_irq(struct pci_dev *pdev)
-{
-	return pdev->irq_managed && pdev->irq > 0;
-}
-
 void pci_disable_device(struct pci_dev *dev);
 
 extern unsigned int pcibios_max_latency;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a4..79ec7bb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -397,6 +397,7 @@
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
+	PERF_EVENT_STATE_DEAD		= -4,
 	PERF_EVENT_STATE_EXIT		= -3,
 	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
@@ -467,6 +468,7 @@
 	int				group_flags;
 	struct perf_event		*group_leader;
 	struct pmu			*pmu;
+	void				*pmu_private;
 
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
@@ -634,9 +636,6 @@
 	int				nr_cgroups;	 /* cgroup evts */
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
-
-	struct delayed_work		orphans_remove;
-	bool				orphans_remove_sched;
 };
 
 /*
@@ -729,7 +728,7 @@
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -908,7 +907,7 @@
 	}
 }
 
-extern struct static_key_deferred perf_sched_events;
+extern struct static_key_false perf_sched_events;
 
 static __always_inline bool
 perf_sw_migrate_enabled(void)
@@ -927,7 +926,7 @@
 static inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
-	if (static_key_false(&perf_sched_events.key))
+	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_in(prev, task);
 
 	if (perf_sw_migrate_enabled() && task->sched_migrated) {
@@ -944,7 +943,7 @@
 {
 	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
 
-	if (static_key_false(&perf_sched_events.key))
+	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_out(prev, next);
 }
 
@@ -1044,7 +1043,7 @@
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
@@ -1070,7 +1069,7 @@
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
-static inline struct perf_event *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	return ERR_PTR(-EINVAL);
@@ -1111,12 +1110,6 @@
 static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
 #endif
 
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
-extern bool perf_event_can_stop_tick(void);
-#else
-static inline bool perf_event_can_stop_tick(void)			{ return true; }
-#endif
-
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern void perf_restore_debug_store(void);
 #else
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index 2d8e497..1132953 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -10,7 +10,7 @@
  * backing is indicated by flags in the high bits of the value.
  */
 typedef struct {
-	unsigned long val;
+	u64 val;
 } pfn_t;
 #endif
 
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 0703b53..9499481 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -9,14 +9,13 @@
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
  */
-#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \
-		<< (BITS_PER_LONG - PAGE_SHIFT))
-#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1))
-#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2))
-#define PFN_DEV (1UL << (BITS_PER_LONG - 3))
-#define PFN_MAP (1UL << (BITS_PER_LONG - 4))
+#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
+#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
+#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
+#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
+#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
 
-static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags)
+static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
 {
 	pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
 
@@ -29,7 +28,7 @@
 	return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -48,7 +47,7 @@
 	return NULL;
 }
 
-static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
 {
 	return PFN_PHYS(pfn_t_to_pfn(pfn));
 }
@@ -87,7 +86,7 @@
 #ifdef __HAVE_ARCH_PTE_DEVMAP
 static inline bool pfn_t_devmap(pfn_t pfn)
 {
-	const unsigned long flags = PFN_DEV|PFN_MAP;
+	const u64 flags = PFN_DEV|PFN_MAP;
 
 	return (pfn.val & flags) == flags;
 }
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 907f3fd..62d44c1 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -128,9 +128,6 @@
 void run_posix_cpu_timers(struct task_struct *task);
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
-
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval);
 
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 998d8f1..b50c049 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -49,6 +49,7 @@
 
 struct bq27xxx_device_info {
 	struct device *dev;
+	int id;
 	enum bq27xxx_chip chip;
 	const char *name;
 	struct bq27xxx_access_methods bus;
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7c88ad1..f54be70 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -379,12 +379,28 @@
 			     struct radix_tree_iter *iter, unsigned flags);
 
 /**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:	iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+	iter->next_index = iter->index;
+	return NULL;
+}
+
+/**
  * radix_tree_chunk_size - get current chunk size
  *
  * @iter:	pointer to radix tree iterator
  * Returns:	current chunk size
  */
-static __always_inline unsigned
+static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
 	return iter->next_index - iter->index;
@@ -418,9 +434,9 @@
 			return slot + offset + 1;
 		}
 	} else {
-		unsigned size = radix_tree_chunk_size(iter) - 1;
+		long size = radix_tree_chunk_size(iter);
 
-		while (size--) {
+		while (--size > 0) {
 			slot++;
 			iter->index++;
 			if (likely(*slot))
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a7a06d1..a0118d5 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -152,6 +152,8 @@
 
 # define jiffies	raid6_jiffies()
 # define printk 	printf
+# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL	0
 # define __get_free_pages(x, y)	((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
 						     PROT_READ|PROT_WRITE,   \
diff --git a/include/linux/random.h b/include/linux/random.h
index a75840c..9c29122 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@
 #endif
 
 unsigned int get_random_int(void);
+unsigned long get_random_long(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 14e6f47..b5d48bd 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -360,8 +360,6 @@
 #else
 static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
-static inline void rcu_user_hooks_switch(struct task_struct *prev,
-					 struct task_struct *next) { }
 #endif /* CONFIG_NO_HZ_FULL */
 
 #ifdef CONFIG_RCU_NOCB_CPU
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bdf597c..a07f42b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -109,20 +109,6 @@
 		__put_anon_vma(anon_vma);
 }
 
-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
-{
-	struct anon_vma *anon_vma = vma->anon_vma;
-	if (anon_vma)
-		down_write(&anon_vma->root->rwsem);
-}
-
-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
-{
-	struct anon_vma *anon_vma = vma->anon_vma;
-	if (anon_vma)
-		up_write(&anon_vma->root->rwsem);
-}
-
 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
 	down_write(&anon_vma->root->rwsem);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a..c617ea1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -182,8 +182,6 @@
 static inline void update_cpu_load_nohz(int active) { }
 #endif
 
-extern unsigned long get_parent_ip(unsigned long addr);
-
 extern void dump_cpu_task(int cpu);
 
 struct seq_file;
@@ -719,6 +717,10 @@
 	/* Earliest-expiration cache. */
 	struct task_cputime cputime_expires;
 
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long tick_dep_mask;
+#endif
+
 	struct list_head cpu_timers[3];
 
 	struct pid *tty_old_pgrp;
@@ -920,6 +922,10 @@
 #endif
 }
 
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
 enum cpu_idle_type {
 	CPU_IDLE,
 	CPU_NOT_IDLE,
@@ -1289,6 +1295,8 @@
 	unsigned long timeout;
 	unsigned long watchdog_stamp;
 	unsigned int time_slice;
+	unsigned short on_rq;
+	unsigned short on_list;
 
 	struct sched_rt_entity *back;
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -1329,10 +1337,6 @@
 	 * task has to wait for a replenishment to be performed at the
 	 * next firing of dl_timer.
 	 *
-	 * @dl_new tells if a new instance arrived. If so we must
-	 * start executing it with full runtime and reset its absolute
-	 * deadline;
-	 *
 	 * @dl_boosted tells if we are boosted due to DI. If so we are
 	 * outside bandwidth enforcement mechanism (but only until we
 	 * exit the critical section);
@@ -1340,7 +1344,7 @@
 	 * @dl_yielded tells if task gave up the cpu before consuming
 	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_new, dl_boosted, dl_yielded;
+	int dl_throttled, dl_boosted, dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
@@ -1542,6 +1546,10 @@
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
+
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long tick_dep_mask;
+#endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
 	u64 real_start_time;	/* boot based time in nsec */
@@ -2356,10 +2364,7 @@
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-extern bool sched_can_stop_tick(void);
 extern u64 scheduler_tick_max_deferment(void);
-#else
-static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index c9e4731..4f080ab 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -95,4 +95,8 @@
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
+extern int sysctl_schedstats(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
+
 #endif /* _SCHED_SYSCTL_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11f935c..d3fcd45 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -299,6 +299,7 @@
 #else
 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
 #endif
+extern int sysctl_max_skb_frags;
 
 typedef struct skb_frag_struct skb_frag_t;
 
@@ -1984,6 +1985,30 @@
 	skb->tail += len;
 }
 
+/**
+ *	skb_tailroom_reserve - adjust reserved_tailroom
+ *	@skb: buffer to alter
+ *	@mtu: maximum amount of headlen permitted
+ *	@needed_tailroom: minimum amount of reserved_tailroom
+ *
+ *	Set reserved_tailroom so that headlen can be as large as possible but
+ *	not larger than mtu and tailroom cannot be smaller than
+ *	needed_tailroom.
+ *	The required headroom should already have been reserved before using
+ *	this function.
+ */
+static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
+					unsigned int needed_tailroom)
+{
+	SKB_LINEAR_ASSERT(skb);
+	if (mtu < skb_tailroom(skb) - needed_tailroom)
+		/* use at most mtu */
+		skb->reserved_tailroom = skb_tailroom(skb) - mtu;
+	else
+		/* use up to all available space */
+		skb->reserved_tailroom = needed_tailroom;
+}
+
 #define ENCAP_TYPE_ETHER	0
 #define ENCAP_TYPE_IPPROTO	1
 
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 343c13a..35cb926 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -44,6 +44,7 @@
 
 #define KNAV_DMA_NUM_EPIB_WORDS			4
 #define KNAV_DMA_NUM_PS_WORDS			16
+#define KNAV_DMA_NUM_SW_DATA_WORDS		4
 #define KNAV_DMA_FDQ_PER_CHAN			4
 
 /* Tx channel scheduling priority */
@@ -142,6 +143,7 @@
  * @orig_buff:			buff pointer since 'buff' can be overwritten
  * @epib:			Extended packet info block
  * @psdata:			Protocol specific
+ * @sw_data:			Software private data not touched by h/w
  */
 struct knav_dma_desc {
 	__le32	desc_info;
@@ -154,7 +156,7 @@
 	__le32	orig_buff;
 	__le32	epib[KNAV_DMA_NUM_EPIB_WORDS];
 	__le32	psdata[KNAV_DMA_NUM_PS_WORDS];
-	__le32	pad[4];
+	u32	sw_data[KNAV_DMA_NUM_SW_DATA_WORDS];
 } ____cacheline_aligned;
 
 #if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f5f80c5..dc8eb63 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -99,8 +99,23 @@
 	}
 
 /*
- * define and init a srcu struct at build time.
- * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
  */
 #define __DEFINE_SRCU(name, is_static)					\
 	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index eead8ab..881a79d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -100,6 +100,7 @@
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
+	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
diff --git a/include/linux/swait.h b/include/linux/swait.h
new file mode 100644
index 0000000..c1f9c62
--- /dev/null
+++ b/include/linux/swait.h
@@ -0,0 +1,172 @@
+#ifndef _LINUX_SWAIT_H
+#define _LINUX_SWAIT_H
+
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <asm/current.h>
+
+/*
+ * Simple wait queues
+ *
+ * While these are very similar to the other/complex wait queues (wait.h) the
+ * most important difference is that the simple waitqueue allows for
+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
+ *
+ * In order to make this so, we had to drop a fair number of features of the
+ * other waitqueue code; notably:
+ *
+ *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
+ *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
+ *    sleeper state.
+ *
+ *  - the exclusive mode; because this requires preserving the list order
+ *    and this is hard.
+ *
+ *  - custom wake functions; because you cannot give any guarantees about
+ *    random code.
+ *
+ * As a side effect of this; the data structures are slimmer.
+ *
+ * One would recommend using this wait queue where possible.
+ */
+
+struct task_struct;
+
+struct swait_queue_head {
+	raw_spinlock_t		lock;
+	struct list_head	task_list;
+};
+
+struct swait_queue {
+	struct task_struct	*task;
+	struct list_head	task_list;
+};
+
+#define __SWAITQUEUE_INITIALIZER(name) {				\
+	.task		= current,					\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAITQUEUE(name)					\
+	struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
+
+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {				\
+	.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAIT_QUEUE_HEAD(name)					\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
+
+extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+				    struct lock_class_key *key);
+
+#define init_swait_queue_head(q)				\
+	do {							\
+		static struct lock_class_key __key;		\
+		__init_swait_queue_head((q), #q, &__key);	\
+	} while (0)
+
+#ifdef CONFIG_LOCKDEP
+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)			\
+	({ init_swait_queue_head(&name); name; })
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+#else
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	DECLARE_SWAIT_QUEUE_HEAD(name)
+#endif
+
+static inline int swait_active(struct swait_queue_head *q)
+{
+	return !list_empty(&q->task_list);
+}
+
+extern void swake_up(struct swait_queue_head *q);
+extern void swake_up_all(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q);
+
+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
+
+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+
+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+#define ___swait_event(wq, condition, state, ret, cmd)			\
+({									\
+	struct swait_queue __wait;					\
+	long __ret = ret;						\
+									\
+	INIT_LIST_HEAD(&__wait.task_list);				\
+	for (;;) {							\
+		long __int = prepare_to_swait_event(&wq, &__wait, state);\
+									\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
+			break;						\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_swait(&wq, &__wait);					\
+	__ret;								\
+})
+
+#define __swait_event(wq, condition)					\
+	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
+			    schedule())
+
+#define swait_event(wq, condition)					\
+do {									\
+	if (condition)							\
+		break;							\
+	__swait_event(wq, condition);					\
+} while (0)
+
+#define __swait_event_timeout(wq, condition, timeout)			\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_UNINTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_timeout(wq, condition, timeout)			\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_timeout(wq, condition, timeout);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible(wq, condition)			\
+	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
+		      schedule())
+
+#define swait_event_interruptible(wq, condition)			\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__ret = __swait_event_interruptible(wq, condition);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible_timeout(wq, condition, timeout)	\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_interruptible_timeout(wq, condition, timeout)	\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_interruptible_timeout(wq,		\
+						condition, timeout);	\
+	__ret;								\
+})
+
+#endif /* _LINUX_SWAIT_H */
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e7a018e..017fced 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 97fd4e5..21f7364 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -97,6 +97,19 @@
 	tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
 }
 
+enum tick_dep_bits {
+	TICK_DEP_BIT_POSIX_TIMER	= 0,
+	TICK_DEP_BIT_PERF_EVENTS	= 1,
+	TICK_DEP_BIT_SCHED		= 2,
+	TICK_DEP_BIT_CLOCK_UNSTABLE	= 3
+};
+
+#define TICK_DEP_MASK_NONE		0
+#define TICK_DEP_MASK_POSIX_TIMER	(1 << TICK_DEP_BIT_POSIX_TIMER)
+#define TICK_DEP_MASK_PERF_EVENTS	(1 << TICK_DEP_BIT_PERF_EVENTS)
+#define TICK_DEP_MASK_SCHED		(1 << TICK_DEP_BIT_SCHED)
+#define TICK_DEP_MASK_CLOCK_UNSTABLE	(1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
+
 #ifdef CONFIG_NO_HZ_COMMON
 extern int tick_nohz_enabled;
 extern int tick_nohz_tick_stopped(void);
@@ -154,9 +167,73 @@
 	return cpumask_any_and(housekeeping_mask, cpu_online_mask);
 }
 
-extern void tick_nohz_full_kick(void);
+extern void tick_nohz_dep_set(enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_task(struct task_struct *tsk,
+				   enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+				     enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit);
+
+/*
+ * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
+ * on top of static keys.
+ */
+static inline void tick_dep_set(enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set(bit);
+}
+
+static inline void tick_dep_clear(enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear(bit);
+}
+
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_cpu(cpu))
+		tick_nohz_dep_set_cpu(cpu, bit);
+}
+
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_cpu(cpu))
+		tick_nohz_dep_clear_cpu(cpu, bit);
+}
+
+static inline void tick_dep_set_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set_task(tsk, bit);
+}
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+				       enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear_task(tsk, bit);
+}
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set_signal(signal, bit);
+}
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+					 enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear_signal(signal, bit);
+}
+
 extern void tick_nohz_full_kick_cpu(int cpu);
-extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(void);
 #else
 static inline int housekeeping_any_cpu(void)
@@ -166,9 +243,21 @@
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
+
+static inline void tick_dep_set(enum tick_dep_bits bit) { }
+static inline void tick_dep_clear(enum tick_dep_bits bit) { }
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_set_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+				       enum tick_dep_bits bit) { }
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+					 enum tick_dep_bits bit) { }
+
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
-static inline void tick_nohz_full_kick(void) { }
-static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(void) { }
 #endif
 
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 429fdfc..925730b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -568,6 +568,8 @@
 	FILTER_DYN_STRING,
 	FILTER_PTR_STRING,
 	FILTER_TRACE_FN,
+	FILTER_COMM,
+	FILTER_CPU,
 };
 
 extern int trace_event_raw_init(struct trace_event_call *call);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index acd522a..be586c6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -14,8 +14,10 @@
  * See the file COPYING for more details.
  */
 
+#include <linux/smp.h>
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/cpumask.h>
 #include <linux/rcupdate.h>
 #include <linux/tracepoint-defs.h>
 
@@ -338,15 +340,19 @@
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)					\
-		__DECLARE_TRACE(name, void, , 1, void *__data, __data)
+	__DECLARE_TRACE(name, void, ,					\
+			cpu_online(raw_smp_processor_id()),		\
+			void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)				\
-		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,	\
-				PARAMS(void *__data, proto),		\
-				PARAMS(__data, args))
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()),		\
+			PARAMS(void *__data, proto),			\
+			PARAMS(__data, args))
 
 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
-	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
 			PARAMS(void *__data, proto),			\
 			PARAMS(__data, args))
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 2fd8708..d9fb4b0 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -649,6 +649,7 @@
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern int  tty_lock_interruptible(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h
index cbb20af..bb679b4 100644
--- a/include/linux/ucs2_string.h
+++ b/include/linux/ucs2_string.h
@@ -11,4 +11,8 @@
 unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength);
 int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len);
 
+unsigned long ucs2_utf8size(const ucs2_char_t *src);
+unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src,
+			   unsigned long maxlength);
+
 #endif /* _LINUX_UCS2_STRING_H_ */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index ae71a76..27d7a0a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -338,7 +338,7 @@
 			    schedule(); try_to_freeze())
 
 /**
- * wait_event - sleep (or freeze) until a condition gets true
+ * wait_event_freezable - sleep (or freeze) until a condition gets true
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0e32bc7..ca73c50 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -311,6 +311,7 @@
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
+	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@
 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
-	alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)				\
-	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
-			1, (name))
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND |	\
+			WQ_MEM_RECLAIM, 1, (name))
 #define create_singlethread_workqueue(name)				\
-	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+	alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b333c94..d0b5ca5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -198,6 +198,7 @@
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_io(struct writeback_control *wbc, struct page *page,
 		    size_t bytes);
+void cgroup_writeback_umount(void);
 
 /**
  * inode_attach_wb - associate an inode with its wb
@@ -301,6 +302,10 @@
 {
 }
 
+static inline void cgroup_writeback_umount(void)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /*
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index ef03ae5..8a0f55b 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -533,7 +533,8 @@
 		const unsigned int requested_sizes[]);
 int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb);
 int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb);
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking);
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+		   bool nonblocking);
 
 int vb2_core_streamon(struct vb2_queue *q, unsigned int type);
 int vb2_core_streamoff(struct vb2_queue *q, unsigned int type);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2a91a05..9b4c418 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -6,8 +6,8 @@
 #include <linux/mutex.h>
 #include <net/sock.h>
 
-void unix_inflight(struct file *fp);
-void unix_notinflight(struct file *fp);
+void unix_inflight(struct user_struct *user, struct file *fp);
+void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 5289929..5ee3c68 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -252,6 +252,12 @@
 #define L2CAP_PSM_3DSP		0x0021
 #define L2CAP_PSM_IPSP		0x0023 /* 6LoWPAN */
 
+#define L2CAP_PSM_DYN_START	0x1001
+#define L2CAP_PSM_DYN_END	0xffff
+#define L2CAP_PSM_AUTO_END	0x10ff
+#define L2CAP_PSM_LE_DYN_START  0x0080
+#define L2CAP_PSM_LE_DYN_END	0x00ff
+
 /* channel identifier */
 #define L2CAP_CID_SIGNALING	0x0001
 #define L2CAP_CID_CONN_LESS	0x0002
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 6816f0f..30a56ab 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -44,6 +44,24 @@
 	return dst && !(dst->flags & DST_METADATA);
 }
 
+static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
+				       const struct sk_buff *skb_b)
+{
+	const struct metadata_dst *a, *b;
+
+	if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
+		return 0;
+
+	a = (const struct metadata_dst *) skb_dst(skb_a);
+	b = (const struct metadata_dst *) skb_dst(skb_b);
+
+	if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
+		return 1;
+
+	return memcmp(&a->u.tun_info, &b->u.tun_info,
+		      sizeof(a->u.tun_info) + a->u.tun_info.options_len);
+}
+
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 481fe1c..49dcad4 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -270,8 +270,9 @@
 					    struct sock *newsk,
 					    const struct request_sock *req);
 
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
-			      struct sock *child);
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+				      struct request_sock *req,
+				      struct sock *child);
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout);
 struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 877f682..295d291 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -64,8 +64,16 @@
 
 void ip6_route_input(struct sk_buff *skb);
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-				   struct flowi6 *fl6);
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+					 struct flowi6 *fl6, int flags);
+
+static inline struct dst_entry *ip6_route_output(struct net *net,
+						 const struct sock *sk,
+						 struct flowi6 *fl6)
+{
+	return ip6_route_output_flags(net, sk, fl6, 0);
+}
+
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags);
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 7029527..4079fc1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -61,6 +61,7 @@
 	struct rtable __rcu		*fnhe_rth_input;
 	struct rtable __rcu		*fnhe_rth_output;
 	unsigned long			fnhe_stamp;
+	struct rcu_head			rcu;
 };
 
 struct fnhe_hash_bucket {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6db96ea..dda9abf 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -230,6 +230,7 @@
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 		    u8 *protocol, struct flowi4 *fl4);
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 8f81bbb..e0f4109 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -439,6 +439,12 @@
 /* Send a single event to user space */
 void wireless_send_event(struct net_device *dev, unsigned int cmd,
 			 union iwreq_data *wrqu, const char *extra);
+#ifdef CONFIG_WEXT_CORE
+/* flush all previous wext events - if work is done from netdev notifiers */
+void wireless_nlevent_flush(void);
+#else
+static inline void wireless_nlevent_flush(void) {}
+#endif
 
 /* We may need a function to send a stream of events to user space.
  * More on that later... */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 788ef58..62e17d1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -79,12 +79,10 @@
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-#ifdef CONFIG_LOCKDEP
-# define CONNTRACK_LOCKS 8
-#else
-# define CONNTRACK_LOCKS 1024
-#endif
+#define CONNTRACK_LOCKS 1024
+
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
diff --git a/include/net/scm.h b/include/net/scm.h
index 262532d..59fa93c 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -21,6 +21,7 @@
 struct scm_fp_list {
 	short			count;
 	short			max;
+	struct user_struct	*user;
 	struct file		*fp[SCM_MAX_FD];
 };
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 20e7212..205630b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -756,7 +756,6 @@
 
 	/* Reference counting. */
 	atomic_t refcnt;
-	__u32	 dead:1,
 		/* RTO-Pending : A flag used to track if one of the DATA
 		 *		chunks sent to this address is currently being
 		 *		used to compute a RTT. If this flag is 0,
@@ -766,7 +765,7 @@
 		 *		calculation completes (i.e. the DATA chunk
 		 *		is SACK'd) clear this flag.
 		 */
-		 rto_pending:1,
+	__u32	rto_pending:1,
 
 		/*
 		 * hb_sent : a flag that signals that we have a pending
@@ -955,7 +954,7 @@
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
-void sctp_transport_hold(struct sctp_transport *);
+int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
diff --git a/include/net/sock.h b/include/net/sock.h
index b9e7b3d..f5ea148 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1036,18 +1036,6 @@
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	/*
-	 * cgroup specific init/deinit functions. Called once for all
-	 * protocols that implement it, from cgroups populate function.
-	 * This function has to setup any files the protocol want to
-	 * appear in the kmem cgroup filesystem.
-	 */
-	int			(*init_cgroup)(struct mem_cgroup *memcg,
-					       struct cgroup_subsys *ss);
-	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
-	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
-#endif
 	int			(*diag_destroy)(struct sock *sk, int err);
 };
 
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 7dda3d7..aecd303 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,7 +16,7 @@
 };
 
 extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8ea1997..ae6468f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -216,7 +216,7 @@
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
-/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+/* TCP initial congestion window as per rfc6928 */
 #define TCP_INIT_CWND		10
 
 /* Bit Flags for sysctl_tcp_fastopen */
@@ -447,7 +447,7 @@
 
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 void tcp_v4_mtu_reduced(struct sock *sk);
-void tcp_req_err(struct sock *sk, u32 seq);
+void tcp_req_err(struct sock *sk, u32 seq, bool abort);
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(const struct sock *sk,
 				      struct request_sock *req,
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index e2b712c..c21c38c 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -343,7 +343,7 @@
 void snd_hdac_bus_exit_link_reset(struct hdac_bus *bus);
 
 void snd_hdac_bus_update_rirb(struct hdac_bus *bus);
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
 				    void (*ack)(struct hdac_bus *,
 						struct hdac_stream *));
 
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index fdabbb4..f730b91 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -167,6 +167,10 @@
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
 			 unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+			      unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
+			       int count);
 
 /* main midi functions */
 
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 56cf8e4..28ee5c2 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -94,5 +94,8 @@
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
 bool target_sense_desc_format(struct se_device *dev);
+sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
+bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+				       struct request_queue *q, int block_size);
 
 #endif /* TARGET_CORE_BACKEND_H */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 5d82816..e8c8c08 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -140,6 +140,8 @@
 	SCF_COMPARE_AND_WRITE		= 0x00080000,
 	SCF_COMPARE_AND_WRITE_POST	= 0x00100000,
 	SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000,
+	SCF_ACK_KREF			= 0x00400000,
+	SCF_USE_CPUID			= 0x00800000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -187,6 +189,7 @@
 	TARGET_SCF_BIDI_OP		= 0x01,
 	TARGET_SCF_ACK_KREF		= 0x02,
 	TARGET_SCF_UNKNOWN_SIZE		= 0x04,
+	TARGET_SCF_USE_CPUID	= 0x08,
 };
 
 /* fabric independent task management function values */
@@ -490,8 +493,9 @@
 #define CMD_T_SENT		(1 << 4)
 #define CMD_T_STOP		(1 << 5)
 #define CMD_T_DEV_ACTIVE	(1 << 7)
-#define CMD_T_REQUEST_STOP	(1 << 8)
 #define CMD_T_BUSY		(1 << 9)
+#define CMD_T_TAS		(1 << 10)
+#define CMD_T_FABRIC_STOP	(1 << 11)
 	spinlock_t		t_state_lock;
 	struct kref		cmd_kref;
 	struct completion	t_transport_stop_comp;
@@ -511,9 +515,6 @@
 
 	struct list_head	state_list;
 
-	/* old task stop completion, consider merging with some of the above */
-	struct completion	task_stop_comp;
-
 	/* backend private data */
 	void			*priv;
 
diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h
index 317a1ed..9130dd5 100644
--- a/include/trace/events/asoc.h
+++ b/include/trace/events/asoc.h
@@ -231,13 +231,13 @@
 	TP_ARGS(jack, mask, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		jack->jack->name	)
+		__string(	name,		jack->jack->id		)
 		__field(	int,		mask			)
 		__field(	int,		val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, jack->jack->name);
+		__assign_str(name, jack->jack->id);
 		__entry->mask = mask;
 		__entry->val = val;
 	),
@@ -253,12 +253,12 @@
 	TP_ARGS(jack, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		jack->jack->name	)
+		__string(	name,		jack->jack->id		)
 		__field(	int,		val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, jack->jack->name);
+		__assign_str(name, jack->jack->id);
 		__entry->val = val;
 	),
 
diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
index 98feb1b..d6dfa05 100644
--- a/include/trace/events/fence.h
+++ b/include/trace/events/fence.h
@@ -17,7 +17,7 @@
 
 	TP_STRUCT__entry(
 		__string(driver, fence->ops->get_driver_name(fence))
-		__string(timeline, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
 		__field(unsigned int, context)
 		__field(unsigned int, seqno)
 
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 073b9ac..5144013 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -328,23 +328,49 @@
 );
 
 #ifdef CONFIG_NO_HZ_COMMON
+
+#define TICK_DEP_NAMES					\
+		tick_dep_name(NONE)			\
+		tick_dep_name(POSIX_TIMER)		\
+		tick_dep_name(PERF_EVENTS)		\
+		tick_dep_name(SCHED)			\
+		tick_dep_name_end(CLOCK_UNSTABLE)
+
+#undef tick_dep_name
+#undef tick_dep_name_end
+
+#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+
+TICK_DEP_NAMES
+
+#undef tick_dep_name
+#undef tick_dep_name_end
+
+#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
+
+#define show_tick_dep_name(val)				\
+	__print_symbolic(val, TICK_DEP_NAMES)
+
 TRACE_EVENT(tick_stop,
 
-	TP_PROTO(int success, char *error_msg),
+	TP_PROTO(int success, int dependency),
 
-	TP_ARGS(success, error_msg),
+	TP_ARGS(success, dependency),
 
 	TP_STRUCT__entry(
 		__field( int ,		success	)
-		__string( msg, 		error_msg )
+		__field( int ,		dependency )
 	),
 
 	TP_fast_assign(
 		__entry->success	= success;
-		__assign_str(msg, error_msg);
+		__entry->dependency	= dependency;
 	),
 
-	TP_printk("success=%s msg=%s",  __entry->success ? "yes" : "no", __get_str(msg))
+	TP_printk("success=%d dependency=%s",  __entry->success, \
+			show_tick_dep_name(__entry->dependency))
 );
 #endif
 
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 4cc989a..f95e1c4 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -48,6 +48,8 @@
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index aa6f857..5df4881 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -292,6 +292,9 @@
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 41e0433..149bec8 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -222,7 +222,6 @@
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
-#define BLKDAXSET _IO(0x12,128)
 #define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 1e3c8cb..a8e3a8c 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -66,27 +66,33 @@
 /*
  * DVB entities
  */
-#define MEDIA_ENT_F_DTV_DEMOD		(MEDIA_ENT_F_BASE + 1)
-#define MEDIA_ENT_F_TS_DEMUX		(MEDIA_ENT_F_BASE + 2)
-#define MEDIA_ENT_F_DTV_CA		(MEDIA_ENT_F_BASE + 3)
-#define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 4)
+#define MEDIA_ENT_F_DTV_DEMOD		(MEDIA_ENT_F_BASE + 0x00001)
+#define MEDIA_ENT_F_TS_DEMUX		(MEDIA_ENT_F_BASE + 0x00002)
+#define MEDIA_ENT_F_DTV_CA		(MEDIA_ENT_F_BASE + 0x00003)
+#define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 0x00004)
+
+/*
+ * I/O entities
+ */
+#define MEDIA_ENT_F_IO_DTV		(MEDIA_ENT_F_BASE + 0x01001)
+#define MEDIA_ENT_F_IO_VBI		(MEDIA_ENT_F_BASE + 0x01002)
+#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 0x01003)
 
 /*
  * Connectors
  */
 /* It is a responsibility of the entity drivers to add connectors and links */
-#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 21)
-#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 22)
-#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 23)
-/* For internal test signal generators and other debug connectors */
-#define MEDIA_ENT_F_CONN_TEST		(MEDIA_ENT_F_BASE + 24)
+#ifdef __KERNEL__
+	/*
+	 * For now, it should not be used in userspace, as some
+	 * definitions may change
+	 */
 
-/*
- * I/O entities
- */
-#define MEDIA_ENT_F_IO_DTV  		(MEDIA_ENT_F_BASE + 31)
-#define MEDIA_ENT_F_IO_VBI  		(MEDIA_ENT_F_BASE + 32)
-#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 33)
+#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 0x30001)
+#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 0x30002)
+#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 0x30003)
+
+#endif
 
 /*
  * Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
@@ -114,7 +120,7 @@
 
 #define MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN	MEDIA_ENT_F_OLD_SUBDEV_BASE
 
-#ifndef __KERNEL__
+#if !defined(__KERNEL__) || defined(__NEED_MEDIA_LEGACY_API)
 
 /*
  * Legacy symbols used to avoid userspace compilation breakages
@@ -127,6 +133,10 @@
 #define MEDIA_ENT_TYPE_MASK		0x00ff0000
 #define MEDIA_ENT_SUBTYPE_MASK		0x0000ffff
 
+/* End of the old subdev reserved numberspace */
+#define MEDIA_ENT_T_DEVNODE_UNKNOWN	(MEDIA_ENT_T_DEVNODE | \
+					 MEDIA_ENT_SUBTYPE_MASK)
+
 #define MEDIA_ENT_T_DEVNODE		MEDIA_ENT_F_OLD_BASE
 #define MEDIA_ENT_T_DEVNODE_V4L		MEDIA_ENT_F_IO_V4L
 #define MEDIA_ENT_T_DEVNODE_FB		(MEDIA_ENT_T_DEVNODE + 2)
@@ -291,14 +301,14 @@
 	__u32 id;
 	char name[64];		/* FIXME: move to a property? (RFC says so) */
 	__u32 function;		/* Main function of the entity */
-	__u16 reserved[12];
-};
+	__u32 reserved[6];
+} __attribute__ ((packed));
 
 /* Should match the specific fields at media_intf_devnode */
 struct media_v2_intf_devnode {
 	__u32 major;
 	__u32 minor;
-};
+} __attribute__ ((packed));
 
 struct media_v2_interface {
 	__u32 id;
@@ -310,22 +320,22 @@
 		struct media_v2_intf_devnode devnode;
 		__u32 raw[16];
 	};
-};
+} __attribute__ ((packed));
 
 struct media_v2_pad {
 	__u32 id;
 	__u32 entity_id;
 	__u32 flags;
-	__u16 reserved[9];
-};
+	__u32 reserved[5];
+} __attribute__ ((packed));
 
 struct media_v2_link {
 	__u32 id;
 	__u32 source_id;
 	__u32 sink_id;
 	__u32 flags;
-	__u32 reserved[5];
-};
+	__u32 reserved[6];
+} __attribute__ ((packed));
 
 struct media_v2_topology {
 	__u64 topology_version;
@@ -345,7 +355,7 @@
 	__u32 num_links;
 	__u32 reserved4;
 	__u64 ptr_links;
-};
+} __attribute__ ((packed));
 
 static inline void __user *media_get_uptr(__u64 arg)
 {
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 5b4a4be..cc68b921 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -66,14 +66,18 @@
 	__u64 length;
 	__u32 status;
 	__u32 max_ars_out;
+	__u32 clear_err_unit;
+	__u32 reserved;
 } __packed;
 
 struct nd_cmd_ars_start {
 	__u64 address;
 	__u64 length;
 	__u16 type;
-	__u8 reserved[6];
+	__u8 flags;
+	__u8 reserved[5];
 	__u32 status;
+	__u32 scrub_time;
 } __packed;
 
 struct nd_cmd_ars_status {
@@ -81,11 +85,14 @@
 	__u32 out_length;
 	__u64 address;
 	__u64 length;
+	__u64 restart_address;
+	__u64 restart_length;
 	__u16 type;
+	__u16 flags;
 	__u32 num_records;
 	struct nd_ars_record {
 		__u32 handle;
-		__u32 flags;
+		__u32 reserved;
 		__u64 err_address;
 		__u64 length;
 	} __packed records[0];
diff --git a/init/main.c b/init/main.c
index 58c9e37..7c27de4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -93,9 +93,6 @@
 extern void init_IRQ(void);
 extern void fork_init(void);
 extern void radix_tree_init(void);
-#ifndef CONFIG_DEBUG_RODATA
-static inline void mark_rodata_ro(void) { }
-#endif
 
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
@@ -499,11 +496,6 @@
 	char *command_line;
 	char *after_dashes;
 
-	/*
-	 * Need to run as early as possible, to initialize the
-	 * lockdep hash:
-	 */
-	lockdep_init();
 	set_task_stack_end_magic(&init_task);
 	smp_setup_processor_id();
 	debug_objects_early_init();
@@ -929,6 +921,28 @@
 
 static noinline void __init kernel_init_freeable(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+static bool rodata_enabled = true;
+static int __init set_debug_rodata(char *str)
+{
+	return strtobool(str, &rodata_enabled);
+}
+__setup("rodata=", set_debug_rodata);
+
+static void mark_readonly(void)
+{
+	if (rodata_enabled)
+		mark_rodata_ro();
+	else
+		pr_info("Kernel memory protection disabled.\n");
+}
+#else
+static inline void mark_readonly(void)
+{
+	pr_warn("This architecture does not have kernel memory protection.\n");
+}
+#endif
+
 static int __ref kernel_init(void *unused)
 {
 	int ret;
@@ -937,7 +951,7 @@
 	/* need to finish all async __init code before freeing the memory */
 	async_synchronize_full();
 	free_initmem();
-	mark_rodata_ro();
+	mark_readonly();
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 
diff --git a/ipc/shm.c b/ipc/shm.c
index ed3027d..331fc1b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -156,11 +156,12 @@
 	struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
 
 	/*
-	 * We raced in the idr lookup or with shm_destroy().  Either way, the
-	 * ID is busted.
+	 * Callers of shm_lock() must validate the status of the returned ipc
+	 * object pointer (as returned by ipc_lock()), and error out as
+	 * appropriate.
 	 */
-	WARN_ON(IS_ERR(ipcp));
-
+	if (IS_ERR(ipcp))
+		return (void *)ipcp;
 	return container_of(ipcp, struct shmid_kernel, shm_perm);
 }
 
@@ -186,18 +187,33 @@
 }
 
 
-/* This is called by fork, once for every shm attach. */
-static void shm_open(struct vm_area_struct *vma)
+static int __shm_open(struct vm_area_struct *vma)
 {
 	struct file *file = vma->vm_file;
 	struct shm_file_data *sfd = shm_file_data(file);
 	struct shmid_kernel *shp;
 
 	shp = shm_lock(sfd->ns, sfd->id);
+
+	if (IS_ERR(shp))
+		return PTR_ERR(shp);
+
 	shp->shm_atim = get_seconds();
 	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
+	return 0;
+}
+
+/* This is called by fork, once for every shm attach. */
+static void shm_open(struct vm_area_struct *vma)
+{
+	int err = __shm_open(vma);
+	/*
+	 * We raced in the idr lookup or with shm_destroy().
+	 * Either way, the ID is busted.
+	 */
+	WARN_ON_ONCE(err);
 }
 
 /*
@@ -260,6 +276,14 @@
 	down_write(&shm_ids(ns).rwsem);
 	/* remove from the list of attaches of the shm segment */
 	shp = shm_lock(ns, sfd->id);
+
+	/*
+	 * We raced in the idr lookup or with shm_destroy().
+	 * Either way, the ID is busted.
+	 */
+	if (WARN_ON_ONCE(IS_ERR(shp)))
+		goto done; /* no-op */
+
 	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
@@ -267,6 +291,7 @@
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
+done:
 	up_write(&shm_ids(ns).rwsem);
 }
 
@@ -388,17 +413,25 @@
 	struct shm_file_data *sfd = shm_file_data(file);
 	int ret;
 
-	ret = sfd->file->f_op->mmap(sfd->file, vma);
-	if (ret != 0)
+	/*
+	 * In case of remap_file_pages() emulation, the file can represent
+	 * removed IPC ID: propogate shm_lock() error to caller.
+	 */
+	ret =__shm_open(vma);
+	if (ret)
 		return ret;
+
+	ret = sfd->file->f_op->mmap(sfd->file, vma);
+	if (ret) {
+		shm_close(vma);
+		return ret;
+	}
 	sfd->vm_ops = vma->vm_ops;
 #ifdef CONFIG_MMU
 	WARN_ON(!sfd->vm_ops->fault);
 #endif
 	vma->vm_ops = &shm_vm_ops;
-	shm_open(vma);
-
-	return ret;
+	return 0;
 }
 
 static int shm_release(struct inode *ino, struct file *file)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b0799bc..89ebbc4 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -291,10 +291,13 @@
 {
 	struct perf_event *event;
 	const struct perf_event_attr *attr;
+	struct file *file;
 
-	event = perf_event_get(fd);
-	if (IS_ERR(event))
-		return event;
+	file = perf_event_get(fd);
+	if (IS_ERR(file))
+		return file;
+
+	event = file->private_data;
 
 	attr = perf_event_attrs(event);
 	if (IS_ERR(attr))
@@ -304,24 +307,22 @@
 		goto err;
 
 	if (attr->type == PERF_TYPE_RAW)
-		return event;
+		return file;
 
 	if (attr->type == PERF_TYPE_HARDWARE)
-		return event;
+		return file;
 
 	if (attr->type == PERF_TYPE_SOFTWARE &&
 	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-		return event;
+		return file;
 err:
-	perf_event_release_kernel(event);
+	fput(file);
 	return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-	struct perf_event *event = ptr;
-
-	perf_event_release_kernel(event);
+	fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d1d3e8f..2e7f7ab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2082,7 +2082,7 @@
 		/* adjust offset of jmps if necessary */
 		if (i < pos && i + insn->off + 1 > pos)
 			insn->off += delta;
-		else if (i > pos && i + insn->off + 1 < pos)
+		else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
 			insn->off -= delta;
 	}
 }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c03a640..d27904c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -58,6 +58,7 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/cpuset.h>
 #include <net/sock.h>
 
 /*
@@ -2739,6 +2740,7 @@
 out_unlock_threadgroup:
 	percpu_up_write(&cgroup_threadgroup_rwsem);
 	cgroup_kn_unlock(of->kn);
+	cpuset_post_attach_flush();
 	return ret ?: nbytes;
 }
 
@@ -4655,14 +4657,15 @@
 
 	if (ss) {
 		/* css free path */
+		struct cgroup_subsys_state *parent = css->parent;
 		int id = css->id;
 
-		if (css->parent)
-			css_put(css->parent);
-
 		ss->css_free(css);
 		cgroup_idr_remove(&ss->css_idr, id);
 		cgroup_put(cgrp);
+
+		if (parent)
+			css_put(parent);
 	} else {
 		/* cgroup free path */
 		atomic_dec(&cgrp->root->nr_cgrps);
@@ -4758,6 +4761,7 @@
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
 	css->serial_nr = css_serial_nr_next++;
+	atomic_set(&css->online_cnt, 0);
 
 	if (cgroup_parent(cgrp)) {
 		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4780,6 +4784,10 @@
 	if (!ret) {
 		css->flags |= CSS_ONLINE;
 		rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
+
+		atomic_inc(&css->online_cnt);
+		if (css->parent)
+			atomic_inc(&css->parent->online_cnt);
 	}
 	return ret;
 }
@@ -5017,10 +5025,15 @@
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 
 	mutex_lock(&cgroup_mutex);
-	offline_css(css);
-	mutex_unlock(&cgroup_mutex);
 
-	css_put(css);
+	do {
+		offline_css(css);
+		css_put(css);
+		/* @css can't go away while we're holding cgroup_mutex */
+		css = css->parent;
+	} while (css && atomic_dec_and_test(&css->online_cnt));
+
+	mutex_unlock(&cgroup_mutex);
 }
 
 /* css kill confirmation processing requires process context, bounce */
@@ -5029,8 +5042,10 @@
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
 
-	INIT_WORK(&css->destroy_work, css_killed_work_fn);
-	queue_work(cgroup_destroy_wq, &css->destroy_work);
+	if (atomic_dec_and_test(&css->online_cnt)) {
+		INIT_WORK(&css->destroy_work, css_killed_work_fn);
+		queue_work(cgroup_destroy_wq, &css->destroy_work);
+	}
 }
 
 /**
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e945fc..41989ab 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -287,6 +287,8 @@
 static DEFINE_MUTEX(cpuset_mutex);
 static DEFINE_SPINLOCK(callback_lock);
 
+static struct workqueue_struct *cpuset_migrate_mm_wq;
+
 /*
  * CPU / memory hotplug is handled asynchronously.
  */
@@ -972,31 +974,51 @@
 }
 
 /*
- * cpuset_migrate_mm
- *
- *    Migrate memory region from one set of nodes to another.
- *
- *    Temporarilly set tasks mems_allowed to target nodes of migration,
- *    so that the migration code can allocate pages on these nodes.
- *
- *    While the mm_struct we are migrating is typically from some
- *    other task, the task_struct mems_allowed that we are hacking
- *    is for our current task, which must allocate new pages for that
- *    migrating memory region.
+ * Migrate memory region from one set of nodes to another.  This is
+ * performed asynchronously as it can be called from process migration path
+ * holding locks involved in process management.  All mm migrations are
+ * performed in the queued order and can be waited for by flushing
+ * cpuset_migrate_mm_wq.
  */
 
+struct cpuset_migrate_mm_work {
+	struct work_struct	work;
+	struct mm_struct	*mm;
+	nodemask_t		from;
+	nodemask_t		to;
+};
+
+static void cpuset_migrate_mm_workfn(struct work_struct *work)
+{
+	struct cpuset_migrate_mm_work *mwork =
+		container_of(work, struct cpuset_migrate_mm_work, work);
+
+	/* on a wq worker, no need to worry about %current's mems_allowed */
+	do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
+	mmput(mwork->mm);
+	kfree(mwork);
+}
+
 static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 							const nodemask_t *to)
 {
-	struct task_struct *tsk = current;
+	struct cpuset_migrate_mm_work *mwork;
 
-	tsk->mems_allowed = *to;
+	mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
+	if (mwork) {
+		mwork->mm = mm;
+		mwork->from = *from;
+		mwork->to = *to;
+		INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
+		queue_work(cpuset_migrate_mm_wq, &mwork->work);
+	} else {
+		mmput(mm);
+	}
+}
 
-	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
-
-	rcu_read_lock();
-	guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed);
-	rcu_read_unlock();
+void cpuset_post_attach_flush(void)
+{
+	flush_workqueue(cpuset_migrate_mm_wq);
 }
 
 /*
@@ -1097,7 +1119,8 @@
 		mpol_rebind_mm(mm, &cs->mems_allowed);
 		if (migrate)
 			cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
-		mmput(mm);
+		else
+			mmput(mm);
 	}
 	css_task_iter_end(&it);
 
@@ -1545,11 +1568,11 @@
 			 * @old_mems_allowed is the right nodesets that we
 			 * migrate mm from.
 			 */
-			if (is_memory_migrate(cs)) {
+			if (is_memory_migrate(cs))
 				cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
 						  &cpuset_attach_nodemask_to);
-			}
-			mmput(mm);
+			else
+				mmput(mm);
 		}
 	}
 
@@ -1714,6 +1737,7 @@
 	mutex_unlock(&cpuset_mutex);
 	kernfs_unbreak_active_protection(of->kn);
 	css_put(&cs->css);
+	flush_workqueue(cpuset_migrate_mm_wq);
 	return retval ?: nbytes;
 }
 
@@ -2359,6 +2383,9 @@
 	top_cpuset.effective_mems = node_states[N_MEMORY];
 
 	register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+
+	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
+	BUG_ON(!cpuset_migrate_mm_wq);
 }
 
 /**
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index e1dbf4a..90ff129 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -153,13 +153,11 @@
 	} else {
 		kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
 			   __func__, bp->bp_addr);
-#ifdef CONFIG_DEBUG_RODATA
 		if (!bp->bp_type) {
 			kdb_printf("Software breakpoints are unavailable.\n"
-				   "  Change the kernel CONFIG_DEBUG_RODATA=n\n"
+				   "  Boot the kernel with rodata=off\n"
 				   "  OR use hw breaks: help bph\n");
 		}
-#endif
 		return 1;
 	}
 	return 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 06ae52e..712570d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,8 +49,6 @@
 
 #include <asm/irq_regs.h>
 
-static struct workqueue_struct *perf_wq;
-
 typedef int (*remote_function_f)(void *);
 
 struct remote_function_call {
@@ -66,8 +64,17 @@
 	struct task_struct *p = tfc->p;
 
 	if (p) {
-		tfc->ret = -EAGAIN;
-		if (task_cpu(p) != smp_processor_id() || !task_curr(p))
+		/* -EAGAIN */
+		if (task_cpu(p) != smp_processor_id())
+			return;
+
+		/*
+		 * Now that we're on right CPU with IRQs disabled, we can test
+		 * if we hit the right task without races.
+		 */
+
+		tfc->ret = -ESRCH; /* No such (running) process */
+		if (p != current)
 			return;
 	}
 
@@ -94,13 +101,17 @@
 		.p	= p,
 		.func	= func,
 		.info	= info,
-		.ret	= -ESRCH, /* No such (running) process */
+		.ret	= -EAGAIN,
 	};
+	int ret;
 
-	if (task_curr(p))
-		smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+	do {
+		ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+		if (!ret)
+			ret = data.ret;
+	} while (ret == -EAGAIN);
 
-	return data.ret;
+	return ret;
 }
 
 /**
@@ -126,42 +137,168 @@
 	return data.ret;
 }
 
-static void event_function_call(struct perf_event *event,
-				int (*active)(void *),
-				void (*inactive)(void *),
-				void *data)
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
 {
-	struct perf_event_context *ctx = event->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		cpu_function_call(event->cpu, active, data);
-		return;
-	}
-
-again:
-	if (!task_function_call(task, active, data))
-		return;
-
-	raw_spin_lock_irq(&ctx->lock);
-	if (ctx->is_active) {
-		/*
-		 * Reload the task pointer, it might have been changed by
-		 * a concurrent perf_event_context_sched_out().
-		 */
-		task = ctx->task;
-		raw_spin_unlock_irq(&ctx->lock);
-		goto again;
-	}
-	inactive(data);
-	raw_spin_unlock_irq(&ctx->lock);
+	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
 }
 
-#define EVENT_OWNER_KERNEL ((void *) -1)
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+			  struct perf_event_context *ctx)
+{
+	raw_spin_lock(&cpuctx->ctx.lock);
+	if (ctx)
+		raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+			    struct perf_event_context *ctx)
+{
+	if (ctx)
+		raw_spin_unlock(&ctx->lock);
+	raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
+#define TASK_TOMBSTONE ((void *)-1L)
 
 static bool is_kernel_event(struct perf_event *event)
 {
-	return event->owner == EVENT_OWNER_KERNEL;
+	return READ_ONCE(event->owner) == TASK_TOMBSTONE;
+}
+
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ *  - removing the last event from a task ctx; this is relatively straight
+ *    forward and is done in __perf_remove_from_context.
+ *
+ *  - adding the first event to a task ctx; this is tricky because we cannot
+ *    rely on ctx->is_active and therefore cannot use event_function_call().
+ *    See perf_install_in_context().
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+			struct perf_event_context *, void *);
+
+struct event_function_struct {
+	struct perf_event *event;
+	event_f func;
+	void *data;
+};
+
+static int event_function(void *info)
+{
+	struct event_function_struct *efs = info;
+	struct perf_event *event = efs->event;
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
+	int ret = 0;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	perf_ctx_lock(cpuctx, task_ctx);
+	/*
+	 * Since we do the IPI call without holding ctx->lock things can have
+	 * changed, double check we hit the task we set out to hit.
+	 */
+	if (ctx->task) {
+		if (ctx->task != current) {
+			ret = -ESRCH;
+			goto unlock;
+		}
+
+		/*
+		 * We only use event_function_call() on established contexts,
+		 * and event_function() is only ever called when active (or
+		 * rather, we'll have bailed in task_function_call() or the
+		 * above ctx->task != current test), therefore we must have
+		 * ctx->is_active here.
+		 */
+		WARN_ON_ONCE(!ctx->is_active);
+		/*
+		 * And since we have ctx->is_active, cpuctx->task_ctx must
+		 * match.
+		 */
+		WARN_ON_ONCE(task_ctx != ctx);
+	} else {
+		WARN_ON_ONCE(&cpuctx->ctx != ctx);
+	}
+
+	efs->func(event, cpuctx, ctx, efs->data);
+unlock:
+	perf_ctx_unlock(cpuctx, task_ctx);
+
+	return ret;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+	struct event_function_struct efs = {
+		.event = event,
+		.func = func,
+		.data = data,
+	};
+
+	int ret = event_function(&efs);
+	WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
+	struct event_function_struct efs = {
+		.event = event,
+		.func = func,
+		.data = data,
+	};
+
+	if (!event->parent) {
+		/*
+		 * If this is a !child event, we must hold ctx::mutex to
+		 * stabilize the the event->ctx relation. See
+		 * perf_event_ctx_lock().
+		 */
+		lockdep_assert_held(&ctx->mutex);
+	}
+
+	if (!task) {
+		cpu_function_call(event->cpu, event_function, &efs);
+		return;
+	}
+
+	if (task == TASK_TOMBSTONE)
+		return;
+
+again:
+	if (!task_function_call(task, event_function, &efs))
+		return;
+
+	raw_spin_lock_irq(&ctx->lock);
+	/*
+	 * Reload the task pointer, it might have been changed by
+	 * a concurrent perf_event_context_sched_out().
+	 */
+	task = ctx->task;
+	if (task == TASK_TOMBSTONE) {
+		raw_spin_unlock_irq(&ctx->lock);
+		return;
+	}
+	if (ctx->is_active) {
+		raw_spin_unlock_irq(&ctx->lock);
+		goto again;
+	}
+	func(event, NULL, ctx, data);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
@@ -179,6 +316,7 @@
 enum event_type_t {
 	EVENT_FLEXIBLE = 0x1,
 	EVENT_PINNED = 0x2,
+	EVENT_TIME = 0x4,
 	EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
 };
 
@@ -186,7 +324,13 @@
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-struct static_key_deferred perf_sched_events __read_mostly;
+
+static void perf_sched_delayed(struct work_struct *work);
+DEFINE_STATIC_KEY_FALSE(perf_sched_events);
+static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
+static DEFINE_MUTEX(perf_sched_mutex);
+static atomic_t perf_sched_count;
+
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
@@ -368,28 +512,6 @@
 	return event->clock();
 }
 
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-			  struct perf_event_context *ctx)
-{
-	raw_spin_lock(&cpuctx->ctx.lock);
-	if (ctx)
-		raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-			    struct perf_event_context *ctx)
-{
-	if (ctx)
-		raw_spin_unlock(&ctx->lock);
-	raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 static inline bool
@@ -579,13 +701,7 @@
 	 * we are holding the rcu lock
 	 */
 	cgrp1 = perf_cgroup_from_task(task, NULL);
-
-	/*
-	 * next is NULL when called from perf_event_enable_on_exec()
-	 * that will systematically cause a cgroup_switch()
-	 */
-	if (next)
-		cgrp2 = perf_cgroup_from_task(next, NULL);
+	cgrp2 = perf_cgroup_from_task(next, NULL);
 
 	/*
 	 * only schedule out current cgroup events if we know
@@ -611,8 +727,6 @@
 	 * we are holding the rcu lock
 	 */
 	cgrp1 = perf_cgroup_from_task(task, NULL);
-
-	/* prev can never be NULL */
 	cgrp2 = perf_cgroup_from_task(prev, NULL);
 
 	/*
@@ -917,7 +1031,7 @@
 	if (atomic_dec_and_test(&ctx->refcount)) {
 		if (ctx->parent_ctx)
 			put_ctx(ctx->parent_ctx);
-		if (ctx->task)
+		if (ctx->task && ctx->task != TASK_TOMBSTONE)
 			put_task_struct(ctx->task);
 		call_rcu(&ctx->rcu_head, free_ctx);
 	}
@@ -934,9 +1048,8 @@
  * perf_event_context::mutex nests and those are:
  *
  *  - perf_event_exit_task_context()	[ child , 0 ]
- *      __perf_event_exit_task()
- *        sync_child_event()
- *          put_event()			[ parent, 1 ]
+ *      perf_event_exit_event()
+ *        put_event()			[ parent, 1 ]
  *
  *  - perf_event_init_context()		[ parent, 0 ]
  *      inherit_task_group()
@@ -979,8 +1092,8 @@
  * Lock order:
  *	task_struct::perf_event_mutex
  *	  perf_event_context::mutex
- *	    perf_event_context::lock
  *	    perf_event::child_mutex;
+ *	      perf_event_context::lock
  *	    perf_event::mmap_mutex
  *	    mmap_sem
  */
@@ -1078,6 +1191,7 @@
 
 /*
  * Get the perf_event_context for a task and lock it.
+ *
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
@@ -1118,9 +1232,12 @@
 			goto retry;
 		}
 
-		if (!atomic_inc_not_zero(&ctx->refcount)) {
+		if (ctx->task == TASK_TOMBSTONE ||
+		    !atomic_inc_not_zero(&ctx->refcount)) {
 			raw_spin_unlock(&ctx->lock);
 			ctx = NULL;
+		} else {
+			WARN_ON_ONCE(ctx->task != task);
 		}
 	}
 	rcu_read_unlock();
@@ -1180,16 +1297,18 @@
 
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
- * The caller of this function needs to hold the ctx->lock.
  */
 static void update_event_times(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 	u64 run_end;
 
+	lockdep_assert_held(&ctx->lock);
+
 	if (event->state < PERF_EVENT_STATE_INACTIVE ||
 	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
 		return;
+
 	/*
 	 * in cgroup mode, time_enabled represents
 	 * the time the event was enabled AND active
@@ -1246,6 +1365,8 @@
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+	lockdep_assert_held(&ctx->lock);
+
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
 	event->attach_state |= PERF_ATTACH_CONTEXT;
 
@@ -1448,11 +1569,14 @@
 
 	if (is_cgroup_event(event)) {
 		ctx->nr_cgroups--;
+		/*
+		 * Because cgroup events are always per-cpu events, this will
+		 * always be called from the right CPU.
+		 */
 		cpuctx = __get_cpu_context(ctx);
 		/*
-		 * if there are no more cgroup events
-		 * then cler cgrp to avoid stale pointer
-		 * in update_cgrp_time_from_cpuctx()
+		 * If there are no more cgroup events then clear cgrp to avoid
+		 * stale pointer in update_cgrp_time_from_cpuctx().
 		 */
 		if (!ctx->nr_cgroups)
 			cpuctx->cgrp = NULL;
@@ -1530,45 +1654,11 @@
 		perf_event__header_size(tmp);
 }
 
-/*
- * User event without the task.
- */
 static bool is_orphaned_event(struct perf_event *event)
 {
-	return event && !is_kernel_event(event) && !event->owner;
+	return event->state == PERF_EVENT_STATE_DEAD;
 }
 
-/*
- * Event has a parent but parent's task finished and it's
- * alive only because of children holding refference.
- */
-static bool is_orphaned_child(struct perf_event *event)
-{
-	return is_orphaned_event(event->parent);
-}
-
-static void orphans_remove_work(struct work_struct *work);
-
-static void schedule_orphans_remove(struct perf_event_context *ctx)
-{
-	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
-		return;
-
-	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
-		get_ctx(ctx);
-		ctx->orphans_remove_sched = true;
-	}
-}
-
-static int __init perf_workqueue_init(void)
-{
-	perf_wq = create_singlethread_workqueue("perf");
-	WARN(!perf_wq, "failed to create perf workqueue\n");
-	return perf_wq ? 0 : -1;
-}
-
-core_initcall(perf_workqueue_init);
-
 static inline int pmu_filter_match(struct perf_event *event)
 {
 	struct pmu *pmu = event->pmu;
@@ -1611,14 +1701,14 @@
 
 	perf_pmu_disable(event->pmu);
 
+	event->tstamp_stopped = tstamp;
+	event->pmu->del(event, 0);
+	event->oncpu = -1;
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	if (event->pending_disable) {
 		event->pending_disable = 0;
 		event->state = PERF_EVENT_STATE_OFF;
 	}
-	event->tstamp_stopped = tstamp;
-	event->pmu->del(event, 0);
-	event->oncpu = -1;
 
 	if (!is_software_event(event))
 		cpuctx->active_oncpu--;
@@ -1629,9 +1719,6 @@
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 
-	if (is_orphaned_child(event))
-		schedule_orphans_remove(ctx);
-
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1655,21 +1742,7 @@
 		cpuctx->exclusive = 0;
 }
 
-struct remove_event {
-	struct perf_event *event;
-	bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
-	struct remove_event *re = info;
-	struct perf_event *event = re->event;
-	struct perf_event_context *ctx = event->ctx;
-
-	if (re->detach_group)
-		perf_group_detach(event);
-	list_del_event(event, ctx);
-}
+#define DETACH_GROUP	0x01UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1677,33 +1750,31 @@
  * We disable the event on the hardware level first. After that we
  * remove it from the context list.
  */
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+			   struct perf_cpu_context *cpuctx,
+			   struct perf_event_context *ctx,
+			   void *info)
 {
-	struct remove_event *re = info;
-	struct perf_event *event = re->event;
-	struct perf_event_context *ctx = event->ctx;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	unsigned long flags = (unsigned long)info;
 
-	raw_spin_lock(&ctx->lock);
 	event_sched_out(event, cpuctx, ctx);
-	if (re->detach_group)
+	if (flags & DETACH_GROUP)
 		perf_group_detach(event);
 	list_del_event(event, ctx);
-	if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
-		ctx->is_active = 0;
-		cpuctx->task_ctx = NULL;
-	}
-	raw_spin_unlock(&ctx->lock);
 
-	return 0;
+	if (!ctx->nr_events && ctx->is_active) {
+		ctx->is_active = 0;
+		if (ctx->task) {
+			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+			cpuctx->task_ctx = NULL;
+		}
+	}
 }
 
 /*
  * Remove the event from a task's (or a CPU's) list of events.
  *
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
  * remains valid.  This is OK when called from perf_release since
@@ -1711,73 +1782,32 @@
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-	struct perf_event_context *ctx = event->ctx;
-	struct remove_event re = {
-		.event = event,
-		.detach_group = detach_group,
-	};
+	lockdep_assert_held(&event->ctx->mutex);
 
-	lockdep_assert_held(&ctx->mutex);
-
-	event_function_call(event, __perf_remove_from_context,
-			    ___perf_remove_from_context, &re);
+	event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
  * Cross CPU call to disable a performance event
  */
-int __perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+				 struct perf_cpu_context *cpuctx,
+				 struct perf_event_context *ctx,
+				 void *info)
 {
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	if (event->state < PERF_EVENT_STATE_INACTIVE)
+		return;
 
-	/*
-	 * If this is a per-task event, need to check whether this
-	 * event's task is the current task on this cpu.
-	 *
-	 * Can trigger due to concurrent perf_event_context_sched_out()
-	 * flipping contexts around.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return -EINVAL;
-
-	raw_spin_lock(&ctx->lock);
-
-	/*
-	 * If the event is on, turn it off.
-	 * If it is in error state, leave it in error state.
-	 */
-	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
-		update_context_time(ctx);
-		update_cgrp_time_from_event(event);
-		update_group_times(event);
-		if (event == event->group_leader)
-			group_sched_out(event, cpuctx, ctx);
-		else
-			event_sched_out(event, cpuctx, ctx);
-		event->state = PERF_EVENT_STATE_OFF;
-	}
-
-	raw_spin_unlock(&ctx->lock);
-
-	return 0;
-}
-
-void ___perf_event_disable(void *info)
-{
-	struct perf_event *event = info;
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (event->state == PERF_EVENT_STATE_INACTIVE) {
-		update_group_times(event);
-		event->state = PERF_EVENT_STATE_OFF;
-	}
+	update_context_time(ctx);
+	update_cgrp_time_from_event(event);
+	update_group_times(event);
+	if (event == event->group_leader)
+		group_sched_out(event, cpuctx, ctx);
+	else
+		event_sched_out(event, cpuctx, ctx);
+	event->state = PERF_EVENT_STATE_OFF;
 }
 
 /*
@@ -1788,7 +1818,8 @@
  * remains valid.  This condition is satisifed when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
- * goes to exit will block in sync_child_event.
+ * goes to exit will block in perf_event_exit_event().
+ *
  * When called from perf_pending_event it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
@@ -1804,8 +1835,12 @@
 	}
 	raw_spin_unlock_irq(&ctx->lock);
 
-	event_function_call(event, __perf_event_disable,
-			    ___perf_event_disable, event);
+	event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+	event_function_local(event, __perf_event_disable, NULL);
 }
 
 /*
@@ -1918,9 +1953,6 @@
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
 
-	if (is_orphaned_child(event))
-		schedule_orphans_remove(ctx);
-
 out:
 	perf_pmu_enable(event->pmu);
 
@@ -2039,13 +2071,27 @@
 	event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void ctx_sched_out(struct perf_event_context *ctx,
+			  struct perf_cpu_context *cpuctx,
+			  enum event_type_t event_type);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
 	     struct perf_cpu_context *cpuctx,
 	     enum event_type_t event_type,
 	     struct task_struct *task);
 
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+			       struct perf_event_context *ctx)
+{
+	if (!cpuctx->task_ctx)
+		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
+	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+}
+
 static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
 				struct perf_event_context *ctx,
 				struct task_struct *task)
@@ -2058,22 +2104,22 @@
 		ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 }
 
-static void ___perf_install_in_context(void *info)
+static void ctx_resched(struct perf_cpu_context *cpuctx,
+			struct perf_event_context *task_ctx)
 {
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
-
-	/*
-	 * Since the task isn't running, its safe to add the event, us holding
-	 * the ctx->lock ensures the task won't get scheduled in.
-	 */
-	add_event_to_ctx(event, ctx);
+	perf_pmu_disable(cpuctx->ctx.pmu);
+	if (task_ctx)
+		task_ctx_sched_out(cpuctx, task_ctx);
+	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+	perf_event_sched_in(cpuctx, task_ctx, current);
+	perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
 /*
  * Cross CPU call to install and enable a performance event
  *
- * Must be called with ctx->mutex held
+ * Very similar to remote_function() + event_function() but cannot assume that
+ * things like ctx->is_active and cpuctx->task_ctx are set.
  */
 static int  __perf_install_in_context(void *info)
 {
@@ -2081,79 +2127,106 @@
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-	struct task_struct *task = current;
+	bool activate = true;
+	int ret = 0;
 
-	perf_ctx_lock(cpuctx, task_ctx);
-	perf_pmu_disable(cpuctx->ctx.pmu);
-
-	/*
-	 * If there was an active task_ctx schedule it out.
-	 */
-	if (task_ctx)
-		task_ctx_sched_out(task_ctx);
-
-	/*
-	 * If the context we're installing events in is not the
-	 * active task_ctx, flip them.
-	 */
-	if (ctx->task && task_ctx != ctx) {
-		if (task_ctx)
-			raw_spin_unlock(&task_ctx->lock);
+	raw_spin_lock(&cpuctx->ctx.lock);
+	if (ctx->task) {
 		raw_spin_lock(&ctx->lock);
 		task_ctx = ctx;
+
+		/* If we're on the wrong CPU, try again */
+		if (task_cpu(ctx->task) != smp_processor_id()) {
+			ret = -ESRCH;
+			goto unlock;
+		}
+
+		/*
+		 * If we're on the right CPU, see if the task we target is
+		 * current, if not we don't have to activate the ctx, a future
+		 * context switch will do that for us.
+		 */
+		if (ctx->task != current)
+			activate = false;
+		else
+			WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
+
+	} else if (task_ctx) {
+		raw_spin_lock(&task_ctx->lock);
 	}
 
-	if (task_ctx) {
-		cpuctx->task_ctx = task_ctx;
-		task = task_ctx->task;
+	if (activate) {
+		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+		add_event_to_ctx(event, ctx);
+		ctx_resched(cpuctx, task_ctx);
+	} else {
+		add_event_to_ctx(event, ctx);
 	}
 
-	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-
-	update_context_time(ctx);
-	/*
-	 * update cgrp time only if current cgrp
-	 * matches event->cgrp. Must be done before
-	 * calling add_event_to_ctx()
-	 */
-	update_cgrp_time_from_event(event);
-
-	add_event_to_ctx(event, ctx);
-
-	/*
-	 * Schedule everything back in
-	 */
-	perf_event_sched_in(cpuctx, task_ctx, task);
-
-	perf_pmu_enable(cpuctx->ctx.pmu);
+unlock:
 	perf_ctx_unlock(cpuctx, task_ctx);
 
-	return 0;
+	return ret;
 }
 
 /*
- * Attach a performance event to a context
+ * Attach a performance event to a context.
  *
- * First we add the event to the list with the hardware enable bit
- * in event->hw_config cleared.
- *
- * If the event is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
+ * Very similar to event_function_call, see comment there.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,
 			struct perf_event *event,
 			int cpu)
 {
+	struct task_struct *task = READ_ONCE(ctx->task);
+
 	lockdep_assert_held(&ctx->mutex);
 
 	event->ctx = ctx;
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
-	event_function_call(event, __perf_install_in_context,
-			    ___perf_install_in_context, event);
+	if (!task) {
+		cpu_function_call(cpu, __perf_install_in_context, event);
+		return;
+	}
+
+	/*
+	 * Should not happen, we validate the ctx is still alive before calling.
+	 */
+	if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
+		return;
+
+	/*
+	 * Installing events is tricky because we cannot rely on ctx->is_active
+	 * to be set in case this is the nr_events 0 -> 1 transition.
+	 */
+again:
+	/*
+	 * Cannot use task_function_call() because we need to run on the task's
+	 * CPU regardless of whether its current or not.
+	 */
+	if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+		return;
+
+	raw_spin_lock_irq(&ctx->lock);
+	task = ctx->task;
+	if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
+		/*
+		 * Cannot happen because we already checked above (which also
+		 * cannot happen), and we hold ctx->mutex, which serializes us
+		 * against perf_event_exit_task_context().
+		 */
+		raw_spin_unlock_irq(&ctx->lock);
+		return;
+	}
+	raw_spin_unlock_irq(&ctx->lock);
+	/*
+	 * Since !ctx->is_active doesn't mean anything, we must IPI
+	 * unconditionally.
+	 */
+	goto again;
 }
 
 /*
@@ -2180,85 +2253,47 @@
 /*
  * Cross CPU call to enable a performance event
  */
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+				struct perf_cpu_context *cpuctx,
+				struct perf_event_context *ctx,
+				void *info)
 {
-	struct perf_event *event = info;
-	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-	int err;
+	struct perf_event_context *task_ctx;
 
-	/*
-	 * There's a time window between 'ctx->is_active' check
-	 * in perf_event_enable function and this place having:
-	 *   - IRQs on
-	 *   - ctx->lock unlocked
-	 *
-	 * where the task could be killed and 'ctx' deactivated
-	 * by perf_event_exit_task.
-	 */
-	if (!ctx->is_active)
-		return -EINVAL;
+	if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+	    event->state <= PERF_EVENT_STATE_ERROR)
+		return;
 
-	raw_spin_lock(&ctx->lock);
-	update_context_time(ctx);
-
-	if (event->state >= PERF_EVENT_STATE_INACTIVE)
-		goto unlock;
-
-	/*
-	 * set current task's cgroup time reference point
-	 */
-	perf_cgroup_set_timestamp(current, ctx);
+	if (ctx->is_active)
+		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
 
 	__perf_event_mark_enabled(event);
 
+	if (!ctx->is_active)
+		return;
+
 	if (!event_filter_match(event)) {
 		if (is_cgroup_event(event))
 			perf_cgroup_defer_enabled(event);
-		goto unlock;
+		ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+		return;
 	}
 
 	/*
 	 * If the event is in a group and isn't the group leader,
 	 * then don't put it on unless the group is on.
 	 */
-	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
-		goto unlock;
-
-	if (!group_can_go_on(event, cpuctx, 1)) {
-		err = -EEXIST;
-	} else {
-		if (event == leader)
-			err = group_sched_in(event, cpuctx, ctx);
-		else
-			err = event_sched_in(event, cpuctx, ctx);
+	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
+		ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+		return;
 	}
 
-	if (err) {
-		/*
-		 * If this event can't go on and it's part of a
-		 * group, then the whole group has to come off.
-		 */
-		if (leader != event) {
-			group_sched_out(leader, cpuctx, ctx);
-			perf_mux_hrtimer_restart(cpuctx);
-		}
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_EVENT_STATE_ERROR;
-		}
-	}
+	task_ctx = cpuctx->task_ctx;
+	if (ctx->task)
+		WARN_ON_ONCE(task_ctx != ctx);
 
-unlock:
-	raw_spin_unlock(&ctx->lock);
-
-	return 0;
-}
-
-void ___perf_event_enable(void *info)
-{
-	__perf_event_mark_enabled((struct perf_event *)info);
+	ctx_resched(cpuctx, task_ctx);
 }
 
 /*
@@ -2275,7 +2310,8 @@
 	struct perf_event_context *ctx = event->ctx;
 
 	raw_spin_lock_irq(&ctx->lock);
-	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+	if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+	    event->state <  PERF_EVENT_STATE_ERROR) {
 		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
@@ -2291,8 +2327,7 @@
 		event->state = PERF_EVENT_STATE_OFF;
 	raw_spin_unlock_irq(&ctx->lock);
 
-	event_function_call(event, __perf_event_enable,
-			    ___perf_event_enable, event);
+	event_function_call(event, __perf_event_enable, NULL);
 }
 
 /*
@@ -2342,25 +2377,49 @@
 			  struct perf_cpu_context *cpuctx,
 			  enum event_type_t event_type)
 {
-	struct perf_event *event;
 	int is_active = ctx->is_active;
+	struct perf_event *event;
+
+	lockdep_assert_held(&ctx->lock);
+
+	if (likely(!ctx->nr_events)) {
+		/*
+		 * See __perf_remove_from_context().
+		 */
+		WARN_ON_ONCE(ctx->is_active);
+		if (ctx->task)
+			WARN_ON_ONCE(cpuctx->task_ctx);
+		return;
+	}
 
 	ctx->is_active &= ~event_type;
-	if (likely(!ctx->nr_events))
-		return;
+	if (!(ctx->is_active & EVENT_ALL))
+		ctx->is_active = 0;
 
-	update_context_time(ctx);
-	update_cgrp_time_from_cpuctx(cpuctx);
-	if (!ctx->nr_active)
+	if (ctx->task) {
+		WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+		if (!ctx->is_active)
+			cpuctx->task_ctx = NULL;
+	}
+
+	is_active ^= ctx->is_active; /* changed bits */
+
+	if (is_active & EVENT_TIME) {
+		/* update (and stop) ctx time */
+		update_context_time(ctx);
+		update_cgrp_time_from_cpuctx(cpuctx);
+	}
+
+	if (!ctx->nr_active || !(is_active & EVENT_ALL))
 		return;
 
 	perf_pmu_disable(ctx->pmu);
-	if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
+	if (is_active & EVENT_PINNED) {
 		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
 
-	if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
+	if (is_active & EVENT_FLEXIBLE) {
 		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
@@ -2518,17 +2577,21 @@
 		raw_spin_lock(&ctx->lock);
 		raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
 		if (context_equiv(ctx, next_ctx)) {
-			/*
-			 * XXX do we need a memory barrier of sorts
-			 * wrt to rcu_dereference() of perf_event_ctxp
-			 */
-			task->perf_event_ctxp[ctxn] = next_ctx;
-			next->perf_event_ctxp[ctxn] = ctx;
-			ctx->task = next;
-			next_ctx->task = task;
+			WRITE_ONCE(ctx->task, next);
+			WRITE_ONCE(next_ctx->task, task);
 
 			swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
+			/*
+			 * RCU_INIT_POINTER here is safe because we've not
+			 * modified the ctx and the above modification of
+			 * ctx->task and ctx->task_ctx_data are immaterial
+			 * since those values are always verified under
+			 * ctx->lock which we're now holding.
+			 */
+			RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
+			RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
+
 			do_switch = 0;
 
 			perf_event_sync_stat(ctx, next_ctx);
@@ -2541,8 +2604,7 @@
 
 	if (do_switch) {
 		raw_spin_lock(&ctx->lock);
-		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-		cpuctx->task_ctx = NULL;
+		task_ctx_sched_out(cpuctx, ctx);
 		raw_spin_unlock(&ctx->lock);
 	}
 }
@@ -2637,20 +2699,6 @@
 		perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
-{
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-	if (!cpuctx->task_ctx)
-		return;
-
-	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-		return;
-
-	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-	cpuctx->task_ctx = NULL;
-}
-
 /*
  * Called with IRQs disabled
  */
@@ -2725,25 +2773,40 @@
 	     enum event_type_t event_type,
 	     struct task_struct *task)
 {
-	u64 now;
 	int is_active = ctx->is_active;
+	u64 now;
 
-	ctx->is_active |= event_type;
+	lockdep_assert_held(&ctx->lock);
+
 	if (likely(!ctx->nr_events))
 		return;
 
-	now = perf_clock();
-	ctx->timestamp = now;
-	perf_cgroup_set_timestamp(task, ctx);
+	ctx->is_active |= (event_type | EVENT_TIME);
+	if (ctx->task) {
+		if (!is_active)
+			cpuctx->task_ctx = ctx;
+		else
+			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+	}
+
+	is_active ^= ctx->is_active; /* changed bits */
+
+	if (is_active & EVENT_TIME) {
+		/* start ctx time */
+		now = perf_clock();
+		ctx->timestamp = now;
+		perf_cgroup_set_timestamp(task, ctx);
+	}
+
 	/*
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
+	if (is_active & EVENT_PINNED)
 		ctx_pinned_sched_in(ctx, cpuctx);
 
 	/* Then walk through the lower prio flexible groups */
-	if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
+	if (is_active & EVENT_FLEXIBLE)
 		ctx_flexible_sched_in(ctx, cpuctx);
 }
 
@@ -2773,12 +2836,7 @@
 	 * cpu flexible, task flexible.
 	 */
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-
-	if (ctx->nr_events)
-		cpuctx->task_ctx = ctx;
-
-	perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
-
+	perf_event_sched_in(cpuctx, ctx, task);
 	perf_pmu_enable(ctx->pmu);
 	perf_ctx_unlock(cpuctx, ctx);
 }
@@ -2800,6 +2858,16 @@
 	struct perf_event_context *ctx;
 	int ctxn;
 
+	/*
+	 * If cgroup events exist on this CPU, then we need to check if we have
+	 * to switch in PMU state; cgroup event are system-wide mode only.
+	 *
+	 * Since cgroup events are CPU events, we must schedule these in before
+	 * we schedule in the task events.
+	 */
+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
+		perf_cgroup_sched_in(prev, task);
+
 	for_each_task_context_nr(ctxn) {
 		ctx = task->perf_event_ctxp[ctxn];
 		if (likely(!ctx))
@@ -2807,13 +2875,6 @@
 
 		perf_event_context_sched_in(ctx, task);
 	}
-	/*
-	 * if cgroup events exist on this CPU, then we need
-	 * to check if we have to switch in PMU state.
-	 * cgroup event are system-wide mode only
-	 */
-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-		perf_cgroup_sched_in(prev, task);
 
 	if (atomic_read(&nr_switch_events))
 		perf_event_switch(task, prev, true);
@@ -3051,17 +3112,6 @@
 	return rotate;
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-bool perf_event_can_stop_tick(void)
-{
-	if (atomic_read(&nr_freq_events) ||
-	    __this_cpu_read(perf_throttled_count))
-		return false;
-	else
-		return true;
-}
-#endif
-
 void perf_event_task_tick(void)
 {
 	struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3072,6 +3122,7 @@
 
 	__this_cpu_inc(perf_throttled_seq);
 	throttled = __this_cpu_xchg(perf_throttled_count, 0);
+	tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
 
 	list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
 		perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3099,46 +3150,31 @@
 static void perf_event_enable_on_exec(int ctxn)
 {
 	struct perf_event_context *ctx, *clone_ctx = NULL;
+	struct perf_cpu_context *cpuctx;
 	struct perf_event *event;
 	unsigned long flags;
 	int enabled = 0;
-	int ret;
 
 	local_irq_save(flags);
 	ctx = current->perf_event_ctxp[ctxn];
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
-	/*
-	 * We must ctxsw out cgroup events to avoid conflict
-	 * when invoking perf_task_event_sched_in() later on
-	 * in this function. Otherwise we end up trying to
-	 * ctxswin cgroup events which are already scheduled
-	 * in.
-	 */
-	perf_cgroup_sched_out(current, NULL);
-
-	raw_spin_lock(&ctx->lock);
-	task_ctx_sched_out(ctx);
-
-	list_for_each_entry(event, &ctx->event_list, event_entry) {
-		ret = event_enable_on_exec(event, ctx);
-		if (ret)
-			enabled = 1;
-	}
+	cpuctx = __get_cpu_context(ctx);
+	perf_ctx_lock(cpuctx, ctx);
+	ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+	list_for_each_entry(event, &ctx->event_list, event_entry)
+		enabled |= event_enable_on_exec(event, ctx);
 
 	/*
-	 * Unclone this context if we enabled any event.
+	 * Unclone and reschedule this context if we enabled any event.
 	 */
-	if (enabled)
+	if (enabled) {
 		clone_ctx = unclone_ctx(ctx);
+		ctx_resched(cpuctx, ctx);
+	}
+	perf_ctx_unlock(cpuctx, ctx);
 
-	raw_spin_unlock(&ctx->lock);
-
-	/*
-	 * Also calls ctxswin for cgroup events, if any:
-	 */
-	perf_event_context_sched_in(ctx, ctx->task);
 out:
 	local_irq_restore(flags);
 
@@ -3334,7 +3370,6 @@
 	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
-	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3519,13 +3554,37 @@
 		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static DEFINE_SPINLOCK(nr_freq_lock);
+#endif
+
+static void unaccount_freq_event_nohz(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	spin_lock(&nr_freq_lock);
+	if (atomic_dec_and_test(&nr_freq_events))
+		tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
+	spin_unlock(&nr_freq_lock);
+#endif
+}
+
+static void unaccount_freq_event(void)
+{
+	if (tick_nohz_full_enabled())
+		unaccount_freq_event_nohz();
+	else
+		atomic_dec(&nr_freq_events);
+}
+
 static void unaccount_event(struct perf_event *event)
 {
+	bool dec = false;
+
 	if (event->parent)
 		return;
 
 	if (event->attach_state & PERF_ATTACH_TASK)
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
 	if (event->attr.mmap || event->attr.mmap_data)
 		atomic_dec(&nr_mmap_events);
 	if (event->attr.comm)
@@ -3533,19 +3592,32 @@
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
-		atomic_dec(&nr_freq_events);
+		unaccount_freq_event();
 	if (event->attr.context_switch) {
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
 		atomic_dec(&nr_switch_events);
 	}
 	if (is_cgroup_event(event))
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
 	if (has_branch_stack(event))
-		static_key_slow_dec_deferred(&perf_sched_events);
+		dec = true;
+
+	if (dec) {
+		if (!atomic_add_unless(&perf_sched_count, -1, 1))
+			schedule_delayed_work(&perf_sched_work, HZ);
+	}
 
 	unaccount_event_cpu(event, event->cpu);
 }
 
+static void perf_sched_delayed(struct work_struct *work)
+{
+	mutex_lock(&perf_sched_mutex);
+	if (atomic_dec_and_test(&perf_sched_count))
+		static_branch_disable(&perf_sched_events);
+	mutex_unlock(&perf_sched_mutex);
+}
+
 /*
  * The following implement mutual exclusion of events on "exclusive" pmus
  * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
@@ -3556,7 +3628,7 @@
  *  3) two matching events on the same context.
  *
  * The former two cases are handled in the allocation path (perf_event_alloc(),
- * __free_event()), the latter -- before the first perf_install_in_context().
+ * _free_event()), the latter -- before the first perf_install_in_context().
  */
 static int exclusive_event_init(struct perf_event *event)
 {
@@ -3631,29 +3703,6 @@
 	return true;
 }
 
-static void __free_event(struct perf_event *event)
-{
-	if (!event->parent) {
-		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-			put_callchain_buffers();
-	}
-
-	perf_event_free_bpf_prog(event);
-
-	if (event->destroy)
-		event->destroy(event);
-
-	if (event->ctx)
-		put_ctx(event->ctx);
-
-	if (event->pmu) {
-		exclusive_event_destroy(event);
-		module_put(event->pmu->module);
-	}
-
-	call_rcu(&event->rcu_head, free_event_rcu);
-}
-
 static void _free_event(struct perf_event *event)
 {
 	irq_work_sync(&event->pending);
@@ -3675,7 +3724,25 @@
 	if (is_cgroup_event(event))
 		perf_detach_cgroup(event);
 
-	__free_event(event);
+	if (!event->parent) {
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			put_callchain_buffers();
+	}
+
+	perf_event_free_bpf_prog(event);
+
+	if (event->destroy)
+		event->destroy(event);
+
+	if (event->ctx)
+		put_ctx(event->ctx);
+
+	if (event->pmu) {
+		exclusive_event_destroy(event);
+		module_put(event->pmu->module);
+	}
+
+	call_rcu(&event->rcu_head, free_event_rcu);
 }
 
 /*
@@ -3702,14 +3769,13 @@
 	struct task_struct *owner;
 
 	rcu_read_lock();
-	owner = ACCESS_ONCE(event->owner);
 	/*
-	 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
-	 * !owner it means the list deletion is complete and we can indeed
-	 * free this event, otherwise we need to serialize on
+	 * Matches the smp_store_release() in perf_event_exit_task(). If we
+	 * observe !owner it means the list deletion is complete and we can
+	 * indeed free this event, otherwise we need to serialize on
 	 * owner->perf_event_mutex.
 	 */
-	smp_read_barrier_depends();
+	owner = lockless_dereference(event->owner);
 	if (owner) {
 		/*
 		 * Since delayed_put_task_struct() also drops the last
@@ -3737,8 +3803,10 @@
 		 * ensured they're done, and we can proceed with freeing the
 		 * event.
 		 */
-		if (event->owner)
+		if (event->owner) {
 			list_del_init(&event->owner_entry);
+			smp_store_release(&event->owner, NULL);
+		}
 		mutex_unlock(&owner->perf_event_mutex);
 		put_task_struct(owner);
 	}
@@ -3746,37 +3814,111 @@
 
 static void put_event(struct perf_event *event)
 {
-	struct perf_event_context *ctx;
-
 	if (!atomic_long_dec_and_test(&event->refcount))
 		return;
 
-	if (!is_kernel_event(event))
-		perf_remove_from_owner(event);
-
-	/*
-	 * There are two ways this annotation is useful:
-	 *
-	 *  1) there is a lock recursion from perf_event_exit_task
-	 *     see the comment there.
-	 *
-	 *  2) there is a lock-inversion with mmap_sem through
-	 *     perf_read_group(), which takes faults while
-	 *     holding ctx->mutex, however this is called after
-	 *     the last filedesc died, so there is no possibility
-	 *     to trigger the AB-BA case.
-	 */
-	ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
-	WARN_ON_ONCE(ctx->parent_ctx);
-	perf_remove_from_context(event, true);
-	perf_event_ctx_unlock(event, ctx);
-
 	_free_event(event);
 }
 
+/*
+ * Kill an event dead; while event:refcount will preserve the event
+ * object, it will not preserve its functionality. Once the last 'user'
+ * gives up the object, we'll destroy the thing.
+ */
 int perf_event_release_kernel(struct perf_event *event)
 {
-	put_event(event);
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *child, *tmp;
+
+	/*
+	 * If we got here through err_file: fput(event_file); we will not have
+	 * attached to a context yet.
+	 */
+	if (!ctx) {
+		WARN_ON_ONCE(event->attach_state &
+				(PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
+		goto no_ctx;
+	}
+
+	if (!is_kernel_event(event))
+		perf_remove_from_owner(event);
+
+	ctx = perf_event_ctx_lock(event);
+	WARN_ON_ONCE(ctx->parent_ctx);
+	perf_remove_from_context(event, DETACH_GROUP);
+
+	raw_spin_lock_irq(&ctx->lock);
+	/*
+	 * Mark this even as STATE_DEAD, there is no external reference to it
+	 * anymore.
+	 *
+	 * Anybody acquiring event->child_mutex after the below loop _must_
+	 * also see this, most importantly inherit_event() which will avoid
+	 * placing more children on the list.
+	 *
+	 * Thus this guarantees that we will in fact observe and kill _ALL_
+	 * child events.
+	 */
+	event->state = PERF_EVENT_STATE_DEAD;
+	raw_spin_unlock_irq(&ctx->lock);
+
+	perf_event_ctx_unlock(event, ctx);
+
+again:
+	mutex_lock(&event->child_mutex);
+	list_for_each_entry(child, &event->child_list, child_list) {
+
+		/*
+		 * Cannot change, child events are not migrated, see the
+		 * comment with perf_event_ctx_lock_nested().
+		 */
+		ctx = lockless_dereference(child->ctx);
+		/*
+		 * Since child_mutex nests inside ctx::mutex, we must jump
+		 * through hoops. We start by grabbing a reference on the ctx.
+		 *
+		 * Since the event cannot get freed while we hold the
+		 * child_mutex, the context must also exist and have a !0
+		 * reference count.
+		 */
+		get_ctx(ctx);
+
+		/*
+		 * Now that we have a ctx ref, we can drop child_mutex, and
+		 * acquire ctx::mutex without fear of it going away. Then we
+		 * can re-acquire child_mutex.
+		 */
+		mutex_unlock(&event->child_mutex);
+		mutex_lock(&ctx->mutex);
+		mutex_lock(&event->child_mutex);
+
+		/*
+		 * Now that we hold ctx::mutex and child_mutex, revalidate our
+		 * state, if child is still the first entry, it didn't get freed
+		 * and we can continue doing so.
+		 */
+		tmp = list_first_entry_or_null(&event->child_list,
+					       struct perf_event, child_list);
+		if (tmp == child) {
+			perf_remove_from_context(child, DETACH_GROUP);
+			list_del(&child->child_list);
+			free_event(child);
+			/*
+			 * This matches the refcount bump in inherit_event();
+			 * this can't be the last reference.
+			 */
+			put_event(event);
+		}
+
+		mutex_unlock(&event->child_mutex);
+		mutex_unlock(&ctx->mutex);
+		put_ctx(ctx);
+		goto again;
+	}
+	mutex_unlock(&event->child_mutex);
+
+no_ctx:
+	put_event(event); /* Must be the 'last' reference */
 	return 0;
 }
 EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -3786,46 +3928,10 @@
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-	put_event(file->private_data);
+	perf_event_release_kernel(file->private_data);
 	return 0;
 }
 
-/*
- * Remove all orphanes events from the context.
- */
-static void orphans_remove_work(struct work_struct *work)
-{
-	struct perf_event_context *ctx;
-	struct perf_event *event, *tmp;
-
-	ctx = container_of(work, struct perf_event_context,
-			   orphans_remove.work);
-
-	mutex_lock(&ctx->mutex);
-	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
-		struct perf_event *parent_event = event->parent;
-
-		if (!is_orphaned_child(event))
-			continue;
-
-		perf_remove_from_context(event, true);
-
-		mutex_lock(&parent_event->child_mutex);
-		list_del_init(&event->child_list);
-		mutex_unlock(&parent_event->child_mutex);
-
-		free_event(event);
-		put_event(parent_event);
-	}
-
-	raw_spin_lock_irq(&ctx->lock);
-	ctx->orphans_remove_sched = false;
-	raw_spin_unlock_irq(&ctx->lock);
-	mutex_unlock(&ctx->mutex);
-
-	put_ctx(ctx);
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -3969,7 +4075,7 @@
 {
 	bool no_children;
 
-	if (event->state != PERF_EVENT_STATE_EXIT)
+	if (event->state > PERF_EVENT_STATE_EXIT)
 		return false;
 
 	mutex_lock(&event->child_mutex);
@@ -4054,7 +4160,7 @@
 /*
  * Holding the top-level event's child_mutex means that any
  * descendant process that has inherited this event will block
- * in sync_child_event if it goes to exit, thus satisfying the
+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
  * task existence requirements of perf_event_enable/disable.
  */
 static void perf_event_for_each_child(struct perf_event *event,
@@ -4086,36 +4192,14 @@
 		perf_event_for_each_child(sibling, func);
 }
 
-struct period_event {
-	struct perf_event *event;
-	u64 value;
-};
-
-static void ___perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+				struct perf_cpu_context *cpuctx,
+				struct perf_event_context *ctx,
+				void *info)
 {
-	struct period_event *pe = info;
-	struct perf_event *event = pe->event;
-	u64 value = pe->value;
-
-	if (event->attr.freq) {
-		event->attr.sample_freq = value;
-	} else {
-		event->attr.sample_period = value;
-		event->hw.sample_period = value;
-	}
-
-	local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
-{
-	struct period_event *pe = info;
-	struct perf_event *event = pe->event;
-	struct perf_event_context *ctx = event->ctx;
-	u64 value = pe->value;
+	u64 value = *((u64 *)info);
 	bool active;
 
-	raw_spin_lock(&ctx->lock);
 	if (event->attr.freq) {
 		event->attr.sample_freq = value;
 	} else {
@@ -4135,14 +4219,10 @@
 		event->pmu->start(event, PERF_EF_RELOAD);
 		perf_pmu_enable(ctx->pmu);
 	}
-	raw_spin_unlock(&ctx->lock);
-
-	return 0;
 }
 
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
-	struct period_event pe = { .event = event, };
 	u64 value;
 
 	if (!is_sampling_event(event))
@@ -4157,10 +4237,7 @@
 	if (event->attr.freq && value > sysctl_perf_event_sample_rate)
 		return -EINVAL;
 
-	pe.value = value;
-
-	event_function_call(event, __perf_event_period,
-			    ___perf_event_period, &pe);
+	event_function_call(event, __perf_event_period, &value);
 
 	return 0;
 }
@@ -4932,7 +5009,7 @@
 
 	if (event->pending_disable) {
 		event->pending_disable = 0;
-		__perf_event_disable(event);
+		perf_event_disable_local(event);
 	}
 
 	if (event->pending_wakeup) {
@@ -6359,9 +6436,9 @@
 		if (unlikely(throttle
 			     && hwc->interrupts >= max_samples_per_tick)) {
 			__this_cpu_inc(perf_throttled_count);
+			tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
-			tick_nohz_full_kick();
 			ret = 1;
 		}
 	}
@@ -6720,7 +6797,7 @@
 	kfree_rcu(hlist, rcu_head);
 }
 
-static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+static void swevent_hlist_put_cpu(int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -6732,15 +6809,15 @@
 	mutex_unlock(&swhash->hlist_mutex);
 }
 
-static void swevent_hlist_put(struct perf_event *event)
+static void swevent_hlist_put(void)
 {
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		swevent_hlist_put_cpu(event, cpu);
+		swevent_hlist_put_cpu(cpu);
 }
 
-static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+static int swevent_hlist_get_cpu(int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 	int err = 0;
@@ -6763,14 +6840,13 @@
 	return err;
 }
 
-static int swevent_hlist_get(struct perf_event *event)
+static int swevent_hlist_get(void)
 {
-	int err;
-	int cpu, failed_cpu;
+	int err, cpu, failed_cpu;
 
 	get_online_cpus();
 	for_each_possible_cpu(cpu) {
-		err = swevent_hlist_get_cpu(event, cpu);
+		err = swevent_hlist_get_cpu(cpu);
 		if (err) {
 			failed_cpu = cpu;
 			goto fail;
@@ -6783,7 +6859,7 @@
 	for_each_possible_cpu(cpu) {
 		if (cpu == failed_cpu)
 			break;
-		swevent_hlist_put_cpu(event, cpu);
+		swevent_hlist_put_cpu(cpu);
 	}
 
 	put_online_cpus();
@@ -6799,7 +6875,7 @@
 	WARN_ON(event->parent);
 
 	static_key_slow_dec(&perf_swevent_enabled[event_id]);
-	swevent_hlist_put(event);
+	swevent_hlist_put();
 }
 
 static int perf_swevent_init(struct perf_event *event)
@@ -6830,7 +6906,7 @@
 	if (!event->parent) {
 		int err;
 
-		err = swevent_hlist_get(event);
+		err = swevent_hlist_get();
 		if (err)
 			return err;
 
@@ -7751,31 +7827,75 @@
 		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
 }
 
+/* Freq events need the tick to stay alive (see perf_event_task_tick). */
+static void account_freq_event_nohz(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	/* Lock so we don't race with concurrent unaccount */
+	spin_lock(&nr_freq_lock);
+	if (atomic_inc_return(&nr_freq_events) == 1)
+		tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
+	spin_unlock(&nr_freq_lock);
+#endif
+}
+
+static void account_freq_event(void)
+{
+	if (tick_nohz_full_enabled())
+		account_freq_event_nohz();
+	else
+		atomic_inc(&nr_freq_events);
+}
+
+
 static void account_event(struct perf_event *event)
 {
+	bool inc = false;
+
 	if (event->parent)
 		return;
 
 	if (event->attach_state & PERF_ATTACH_TASK)
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
 	if (event->attr.mmap || event->attr.mmap_data)
 		atomic_inc(&nr_mmap_events);
 	if (event->attr.comm)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
-	if (event->attr.freq) {
-		if (atomic_inc_return(&nr_freq_events) == 1)
-			tick_nohz_full_kick_all();
-	}
+	if (event->attr.freq)
+		account_freq_event();
 	if (event->attr.context_switch) {
 		atomic_inc(&nr_switch_events);
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
 	}
 	if (has_branch_stack(event))
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
 	if (is_cgroup_event(event))
-		static_key_slow_inc(&perf_sched_events.key);
+		inc = true;
+
+	if (inc) {
+		if (atomic_inc_not_zero(&perf_sched_count))
+			goto enabled;
+
+		mutex_lock(&perf_sched_mutex);
+		if (!atomic_read(&perf_sched_count)) {
+			static_branch_enable(&perf_sched_events);
+			/*
+			 * Guarantee that all CPUs observe they key change and
+			 * call the perf scheduling hooks before proceeding to
+			 * install events that need them.
+			 */
+			synchronize_sched();
+		}
+		/*
+		 * Now that we have waited for the sync_sched(), allow further
+		 * increments to by-pass the mutex.
+		 */
+		atomic_inc(&perf_sched_count);
+		mutex_unlock(&perf_sched_mutex);
+	}
+enabled:
 
 	account_event_cpu(event, event->cpu);
 }
@@ -7911,6 +8031,9 @@
 		}
 	}
 
+	/* symmetric to unaccount_event() in _free_event() */
+	account_event(event);
+
 	return event;
 
 err_per_task:
@@ -8274,8 +8397,6 @@
 		}
 	}
 
-	account_event(event);
-
 	/*
 	 * Special case software events and allow them to be part of
 	 * any hardware group.
@@ -8394,10 +8515,19 @@
 	if (move_group) {
 		gctx = group_leader->ctx;
 		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+		if (gctx->task == TASK_TOMBSTONE) {
+			err = -ESRCH;
+			goto err_locked;
+		}
 	} else {
 		mutex_lock(&ctx->mutex);
 	}
 
+	if (ctx->task == TASK_TOMBSTONE) {
+		err = -ESRCH;
+		goto err_locked;
+	}
+
 	if (!perf_event_validate_size(event)) {
 		err = -E2BIG;
 		goto err_locked;
@@ -8422,11 +8552,11 @@
 		 * See perf_event_ctx_lock() for comments on the details
 		 * of swizzling perf_event::ctx.
 		 */
-		perf_remove_from_context(group_leader, false);
+		perf_remove_from_context(group_leader, 0);
 
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
-			perf_remove_from_context(sibling, false);
+			perf_remove_from_context(sibling, 0);
 			put_ctx(gctx);
 		}
 
@@ -8479,6 +8609,8 @@
 	perf_event__header_size(event);
 	perf_event__id_header_size(event);
 
+	event->owner = current;
+
 	perf_install_in_context(ctx, event, event->cpu);
 	perf_unpin_context(ctx);
 
@@ -8488,8 +8620,6 @@
 
 	put_online_cpus();
 
-	event->owner = current;
-
 	mutex_lock(&current->perf_event_mutex);
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
@@ -8514,7 +8644,12 @@
 	perf_unpin_context(ctx);
 	put_ctx(ctx);
 err_alloc:
-	free_event(event);
+	/*
+	 * If event_file is set, the fput() above will have called ->release()
+	 * and that will take care of freeing the event.
+	 */
+	if (!event_file)
+		free_event(event);
 err_cpus:
 	put_online_cpus();
 err_task:
@@ -8556,9 +8691,7 @@
 	}
 
 	/* Mark owner so we could distinguish it from user events. */
-	event->owner = EVENT_OWNER_KERNEL;
-
-	account_event(event);
+	event->owner = TASK_TOMBSTONE;
 
 	ctx = find_get_context(event->pmu, task, event);
 	if (IS_ERR(ctx)) {
@@ -8568,12 +8701,14 @@
 
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
+	if (ctx->task == TASK_TOMBSTONE) {
+		err = -ESRCH;
+		goto err_unlock;
+	}
+
 	if (!exclusive_event_installable(event, ctx)) {
-		mutex_unlock(&ctx->mutex);
-		perf_unpin_context(ctx);
-		put_ctx(ctx);
 		err = -EBUSY;
-		goto err_free;
+		goto err_unlock;
 	}
 
 	perf_install_in_context(ctx, event, cpu);
@@ -8582,6 +8717,10 @@
 
 	return event;
 
+err_unlock:
+	mutex_unlock(&ctx->mutex);
+	perf_unpin_context(ctx);
+	put_ctx(ctx);
 err_free:
 	free_event(event);
 err:
@@ -8606,7 +8745,7 @@
 	mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
 	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
 				 event_entry) {
-		perf_remove_from_context(event, false);
+		perf_remove_from_context(event, 0);
 		unaccount_event_cpu(event, src_cpu);
 		put_ctx(src_ctx);
 		list_add(&event->migrate_entry, &events);
@@ -8673,33 +8812,15 @@
 		     &parent_event->child_total_time_enabled);
 	atomic64_add(child_event->total_time_running,
 		     &parent_event->child_total_time_running);
-
-	/*
-	 * Remove this event from the parent's list
-	 */
-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-	mutex_lock(&parent_event->child_mutex);
-	list_del_init(&child_event->child_list);
-	mutex_unlock(&parent_event->child_mutex);
-
-	/*
-	 * Make sure user/parent get notified, that we just
-	 * lost one event.
-	 */
-	perf_event_wakeup(parent_event);
-
-	/*
-	 * Release the parent event, if this was the last
-	 * reference to it.
-	 */
-	put_event(parent_event);
 }
 
 static void
-__perf_event_exit_task(struct perf_event *child_event,
-			 struct perf_event_context *child_ctx,
-			 struct task_struct *child)
+perf_event_exit_event(struct perf_event *child_event,
+		      struct perf_event_context *child_ctx,
+		      struct task_struct *child)
 {
+	struct perf_event *parent_event = child_event->parent;
+
 	/*
 	 * Do not destroy the 'original' grouping; because of the context
 	 * switch optimization the original events could've ended up in a
@@ -8712,57 +8833,86 @@
 	 * Do destroy all inherited groups, we don't care about those
 	 * and being thorough is better.
 	 */
-	perf_remove_from_context(child_event, !!child_event->parent);
+	raw_spin_lock_irq(&child_ctx->lock);
+	WARN_ON_ONCE(child_ctx->is_active);
+
+	if (parent_event)
+		perf_group_detach(child_event);
+	list_del_event(child_event, child_ctx);
+	child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
+	raw_spin_unlock_irq(&child_ctx->lock);
 
 	/*
-	 * It can happen that the parent exits first, and has events
-	 * that are still around due to the child reference. These
-	 * events need to be zapped.
+	 * Parent events are governed by their filedesc, retain them.
 	 */
-	if (child_event->parent) {
-		sync_child_event(child_event, child);
-		free_event(child_event);
-	} else {
-		child_event->state = PERF_EVENT_STATE_EXIT;
+	if (!parent_event) {
 		perf_event_wakeup(child_event);
+		return;
 	}
+	/*
+	 * Child events can be cleaned up.
+	 */
+
+	sync_child_event(child_event, child);
+
+	/*
+	 * Remove this event from the parent's list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_del_init(&child_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	/*
+	 * Kick perf_poll() for is_event_hup().
+	 */
+	perf_event_wakeup(parent_event);
+	free_event(child_event);
+	put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-	struct perf_event *child_event, *next;
 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
-	unsigned long flags;
+	struct perf_event *child_event, *next;
 
-	if (likely(!child->perf_event_ctxp[ctxn]))
+	WARN_ON_ONCE(child != current);
+
+	child_ctx = perf_pin_task_context(child, ctxn);
+	if (!child_ctx)
 		return;
 
-	local_irq_save(flags);
 	/*
-	 * We can't reschedule here because interrupts are disabled,
-	 * and either child is current or it is a task that can't be
-	 * scheduled, so we are now safe from rescheduling changing
-	 * our context.
+	 * In order to reduce the amount of tricky in ctx tear-down, we hold
+	 * ctx::mutex over the entire thing. This serializes against almost
+	 * everything that wants to access the ctx.
+	 *
+	 * The exception is sys_perf_event_open() /
+	 * perf_event_create_kernel_count() which does find_get_context()
+	 * without ctx::mutex (it cannot because of the move_group double mutex
+	 * lock thing). See the comments in perf_install_in_context().
 	 */
-	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+	mutex_lock(&child_ctx->mutex);
 
 	/*
-	 * Take the context lock here so that if find_get_context is
-	 * reading child->perf_event_ctxp, we wait until it has
-	 * incremented the context's refcount before we do put_ctx below.
+	 * In a single ctx::lock section, de-schedule the events and detach the
+	 * context from the task such that we cannot ever get it scheduled back
+	 * in.
 	 */
-	raw_spin_lock(&child_ctx->lock);
-	task_ctx_sched_out(child_ctx);
-	child->perf_event_ctxp[ctxn] = NULL;
+	raw_spin_lock_irq(&child_ctx->lock);
+	task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
 	/*
-	 * If this context is a clone; unclone it so it can't get
-	 * swapped to another process while we're removing all
-	 * the events from it.
+	 * Now that the context is inactive, destroy the task <-> ctx relation
+	 * and mark the context dead.
 	 */
+	RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
+	put_ctx(child_ctx); /* cannot be last */
+	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+	put_task_struct(current); /* cannot be last */
+
 	clone_ctx = unclone_ctx(child_ctx);
-	update_context_time(child_ctx);
-	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+	raw_spin_unlock_irq(&child_ctx->lock);
 
 	if (clone_ctx)
 		put_ctx(clone_ctx);
@@ -8774,20 +8924,8 @@
 	 */
 	perf_event_task(child, child_ctx, 0);
 
-	/*
-	 * We can recurse on the same lock type through:
-	 *
-	 *   __perf_event_exit_task()
-	 *     sync_child_event()
-	 *       put_event()
-	 *         mutex_lock(&ctx->mutex)
-	 *
-	 * But since its the parent context it won't be the same instance.
-	 */
-	mutex_lock(&child_ctx->mutex);
-
 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-		__perf_event_exit_task(child_event, child_ctx, child);
+		perf_event_exit_event(child_event, child_ctx, child);
 
 	mutex_unlock(&child_ctx->mutex);
 
@@ -8812,8 +8950,7 @@
 		 * the owner, closes a race against perf_release() where
 		 * we need to serialize on the owner->perf_event_mutex.
 		 */
-		smp_wmb();
-		event->owner = NULL;
+		smp_store_release(&event->owner, NULL);
 	}
 	mutex_unlock(&child->perf_event_mutex);
 
@@ -8896,21 +9033,20 @@
 		WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-	int err;
-	struct fd f;
-	struct perf_event *event;
+	struct file *file;
 
-	err = perf_fget_light(fd, &f);
-	if (err)
-		return ERR_PTR(err);
+	file = fget_raw(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
 
-	event = f.file->private_data;
-	atomic_long_inc(&event->refcount);
-	fdput(f);
+	if (file->f_op != &perf_fops) {
+		fput(file);
+		return ERR_PTR(-EBADF);
+	}
 
-	return event;
+	return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
@@ -8953,8 +9089,16 @@
 	if (IS_ERR(child_event))
 		return child_event;
 
+	/*
+	 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
+	 * must be under the same lock in order to serialize against
+	 * perf_event_release_kernel(), such that either we must observe
+	 * is_orphaned_event() or they will observe us on the child_list.
+	 */
+	mutex_lock(&parent_event->child_mutex);
 	if (is_orphaned_event(parent_event) ||
 	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
+		mutex_unlock(&parent_event->child_mutex);
 		free_event(child_event);
 		return NULL;
 	}
@@ -9002,8 +9146,6 @@
 	/*
 	 * Link this into the parent event's child list
 	 */
-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-	mutex_lock(&parent_event->child_mutex);
 	list_add_tail(&child_event->child_list, &parent_event->child_list);
 	mutex_unlock(&parent_event->child_mutex);
 
@@ -9208,7 +9350,7 @@
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
 	mutex_lock(&swhash->hlist_mutex);
-	if (swhash->hlist_refcount > 0) {
+	if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
 		struct swevent_hlist *hlist;
 
 		hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -9221,13 +9363,14 @@
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
-	struct remove_event re = { .detach_group = true };
 	struct perf_event_context *ctx = __info;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct perf_event *event;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-		__perf_remove_from_context(&re);
-	rcu_read_unlock();
+	raw_spin_lock(&ctx->lock);
+	list_for_each_entry(event, &ctx->event_list, event_entry)
+		__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
+	raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
@@ -9283,11 +9426,9 @@
 	switch (action & ~CPU_TASKS_FROZEN) {
 
 	case CPU_UP_PREPARE:
-	case CPU_DOWN_FAILED:
 		perf_event_init_cpu(cpu);
 		break;
 
-	case CPU_UP_CANCELED:
 	case CPU_DOWN_PREPARE:
 		perf_event_exit_cpu(cpu);
 		break;
@@ -9316,9 +9457,6 @@
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 
-	/* do not patch jump label more than once per second */
-	jump_label_rate_limit(&perf_sched_events, HZ);
-
 	/*
 	 * Build time assertion that we keep the data_head at the intended
 	 * location.  IOW, validation we got the __reserved[] size right.
@@ -9338,6 +9476,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
 
 static int __init perf_event_sysfs_init(void)
 {
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 92ce5f4..3f8cb1e 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -444,7 +444,7 @@
 	 * current task.
 	 */
 	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
-		__perf_event_disable(bp);
+		perf_event_disable_local(bp);
 	else
 		perf_event_disable(bp);
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index adfdc05..1faad2cf 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -459,6 +459,25 @@
 	__free_page(page);
 }
 
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+	int pg;
+
+	if (rb->aux_priv) {
+		rb->free_aux(rb->aux_priv);
+		rb->free_aux = NULL;
+		rb->aux_priv = NULL;
+	}
+
+	if (rb->aux_nr_pages) {
+		for (pg = 0; pg < rb->aux_nr_pages; pg++)
+			rb_free_aux_page(rb, pg);
+
+		kfree(rb->aux_pages);
+		rb->aux_nr_pages = 0;
+	}
+}
+
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
@@ -547,30 +566,11 @@
 	if (!ret)
 		rb->aux_pgoff = pgoff;
 	else
-		rb_free_aux(rb);
+		__rb_free_aux(rb);
 
 	return ret;
 }
 
-static void __rb_free_aux(struct ring_buffer *rb)
-{
-	int pg;
-
-	if (rb->aux_priv) {
-		rb->free_aux(rb->aux_priv);
-		rb->free_aux = NULL;
-		rb->aux_priv = NULL;
-	}
-
-	if (rb->aux_nr_pages) {
-		for (pg = 0; pg < rb->aux_nr_pages; pg++)
-			rb_free_aux_page(rb, pg);
-
-		kfree(rb->aux_pages);
-		rb->aux_nr_pages = 0;
-	}
-}
-
 void rb_free_aux(struct ring_buffer *rb)
 {
 	if (atomic_dec_and_test(&rb->aux_refcount))
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0167679..5f6ce93 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1178,6 +1178,7 @@
 		goto free_area;
 
 	area->xol_mapping.name = "[uprobes]";
+	area->xol_mapping.fault = NULL;
 	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
diff --git a/kernel/futex.c b/kernel/futex.c
index 0773f2b..a5d2e74 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -124,16 +124,16 @@
  *   futex_wait(futex, val);
  *
  *   waiters++; (a)
- *   mb(); (A) <-- paired with -.
- *                              |
- *   lock(hash_bucket(futex));  |
- *                              |
- *   uval = *futex;             |
- *                              |        *futex = newval;
- *                              |        sys_futex(WAKE, futex);
- *                              |          futex_wake(futex);
- *                              |
- *                              `------->  mb(); (B)
+ *   smp_mb(); (A) <-- paired with -.
+ *                                  |
+ *   lock(hash_bucket(futex));      |
+ *                                  |
+ *   uval = *futex;                 |
+ *                                  |        *futex = newval;
+ *                                  |        sys_futex(WAKE, futex);
+ *                                  |          futex_wake(futex);
+ *                                  |
+ *                                  `--------> smp_mb(); (B)
  *   if (uval == val)
  *     queue();
  *     unlock(hash_bucket(futex));
@@ -334,7 +334,7 @@
 	/*
 	 * Ensure futex_get_mm() implies a full barrier such that
 	 * get_futex_key() implies a full barrier. This is relied upon
-	 * as full barrier (B), see the ordering comment above.
+	 * as smp_mb(); (B), see the ordering comment above.
 	 */
 	smp_mb__after_atomic();
 }
@@ -407,10 +407,10 @@
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		ihold(key->shared.inode); /* implies MB (B) */
+		ihold(key->shared.inode); /* implies smp_mb(); (B) */
 		break;
 	case FUT_OFF_MMSHARED:
-		futex_get_mm(key); /* implies MB (B) */
+		futex_get_mm(key); /* implies smp_mb(); (B) */
 		break;
 	default:
 		/*
@@ -418,7 +418,7 @@
 		 * mm, therefore the only purpose of calling get_futex_key_refs
 		 * is because we need the barrier for the lockless waiter check.
 		 */
-		smp_mb(); /* explicit MB (B) */
+		smp_mb(); /* explicit smp_mb(); (B) */
 	}
 }
 
@@ -497,7 +497,7 @@
 	if (!fshared) {
 		key->private.mm = mm;
 		key->private.address = address;
-		get_futex_key_refs(key);  /* implies MB (B) */
+		get_futex_key_refs(key);  /* implies smp_mb(); (B) */
 		return 0;
 	}
 
@@ -520,7 +520,20 @@
 	else
 		err = 0;
 
-	lock_page(page);
+	/*
+	 * The treatment of mapping from this point on is critical. The page
+	 * lock protects many things but in this context the page lock
+	 * stabilizes mapping, prevents inode freeing in the shared
+	 * file-backed region case and guards against movement to swap cache.
+	 *
+	 * Strictly speaking the page lock is not needed in all cases being
+	 * considered here and page lock forces unnecessarily serialization
+	 * From this point on, mapping will be re-verified if necessary and
+	 * page lock will be acquired only if it is unavoidable
+	 */
+	page = compound_head(page);
+	mapping = READ_ONCE(page->mapping);
+
 	/*
 	 * If page->mapping is NULL, then it cannot be a PageAnon
 	 * page; but it might be the ZERO_PAGE or in the gate area or
@@ -536,19 +549,31 @@
 	 * shmem_writepage move it from filecache to swapcache beneath us:
 	 * an unlikely race, but we do need to retry for page->mapping.
 	 */
-	mapping = compound_head(page)->mapping;
-	if (!mapping) {
-		int shmem_swizzled = PageSwapCache(page);
+	if (unlikely(!mapping)) {
+		int shmem_swizzled;
+
+		/*
+		 * Page lock is required to identify which special case above
+		 * applies. If this is really a shmem page then the page lock
+		 * will prevent unexpected transitions.
+		 */
+		lock_page(page);
+		shmem_swizzled = PageSwapCache(page) || page->mapping;
 		unlock_page(page);
 		put_page(page);
+
 		if (shmem_swizzled)
 			goto again;
+
 		return -EFAULT;
 	}
 
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
+	 * If the futex key is stored on an anonymous page, then the associated
+	 * object is the mm which is implicitly pinned by the calling process.
+	 *
 	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
 	 * it's a read-only handle, it's expected that futexes attach to
 	 * the object not the particular process.
@@ -566,16 +591,74 @@
 		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 		key->private.mm = mm;
 		key->private.address = address;
+
+		get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
 	} else {
+		struct inode *inode;
+
+		/*
+		 * The associated futex object in this case is the inode and
+		 * the page->mapping must be traversed. Ordinarily this should
+		 * be stabilised under page lock but it's not strictly
+		 * necessary in this case as we just want to pin the inode, not
+		 * update the radix tree or anything like that.
+		 *
+		 * The RCU read lock is taken as the inode is finally freed
+		 * under RCU. If the mapping still matches expectations then the
+		 * mapping->host can be safely accessed as being a valid inode.
+		 */
+		rcu_read_lock();
+
+		if (READ_ONCE(page->mapping) != mapping) {
+			rcu_read_unlock();
+			put_page(page);
+
+			goto again;
+		}
+
+		inode = READ_ONCE(mapping->host);
+		if (!inode) {
+			rcu_read_unlock();
+			put_page(page);
+
+			goto again;
+		}
+
+		/*
+		 * Take a reference unless it is about to be freed. Previously
+		 * this reference was taken by ihold under the page lock
+		 * pinning the inode in place so i_lock was unnecessary. The
+		 * only way for this check to fail is if the inode was
+		 * truncated in parallel so warn for now if this happens.
+		 *
+		 * We are not calling into get_futex_key_refs() in file-backed
+		 * cases, therefore a successful atomic_inc return below will
+		 * guarantee that get_futex_key() will still imply smp_mb(); (B).
+		 */
+		if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+			rcu_read_unlock();
+			put_page(page);
+
+			goto again;
+		}
+
+		/* Should be impossible but lets be paranoid for now */
+		if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+			err = -EFAULT;
+			rcu_read_unlock();
+			iput(inode);
+
+			goto out;
+		}
+
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-		key->shared.inode = mapping->host;
+		key->shared.inode = inode;
 		key->shared.pgoff = basepage_index(page);
+		rcu_read_unlock();
 	}
 
-	get_futex_key_refs(key); /* implies MB (B) */
-
 out:
-	unlock_page(page);
 	put_page(page);
 	return err;
 }
@@ -1191,7 +1274,7 @@
 	if (pi_state->owner != current)
 		return -EINVAL;
 
-	raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
 	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
 	/*
@@ -1217,22 +1300,22 @@
 	else if (curval != uval)
 		ret = -EINVAL;
 	if (ret) {
-		raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 		return ret;
 	}
 
-	raw_spin_lock_irq(&pi_state->owner->pi_lock);
+	raw_spin_lock(&pi_state->owner->pi_lock);
 	WARN_ON(list_empty(&pi_state->list));
 	list_del_init(&pi_state->list);
-	raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+	raw_spin_unlock(&pi_state->owner->pi_lock);
 
-	raw_spin_lock_irq(&new_owner->pi_lock);
+	raw_spin_lock(&new_owner->pi_lock);
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &new_owner->pi_state_list);
 	pi_state->owner = new_owner;
-	raw_spin_unlock_irq(&new_owner->pi_lock);
+	raw_spin_unlock(&new_owner->pi_lock);
 
-	raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
 	deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
 
@@ -1864,7 +1947,7 @@
 
 	q->lock_ptr = &hb->lock;
 
-	spin_lock(&hb->lock); /* implies MB (A) */
+	spin_lock(&hb->lock); /* implies smp_mb(); (A) */
 	return hb;
 }
 
@@ -1927,8 +2010,12 @@
 
 	/* In the common case we don't take the spinlock, which is nice. */
 retry:
-	lock_ptr = q->lock_ptr;
-	barrier();
+	/*
+	 * q->lock_ptr can change between this read and the following spin_lock.
+	 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+	 * optimizing lock_ptr out of the logic below.
+	 */
+	lock_ptr = READ_ONCE(q->lock_ptr);
 	if (lock_ptr != NULL) {
 		spin_lock(lock_ptr);
 		/*
@@ -2127,11 +2214,11 @@
 		 * we returned due to timeout or signal without taking the
 		 * rt_mutex. Too late.
 		 */
-		raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+		raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
 		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
 		if (!owner)
 			owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-		raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+		raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
 		ret = fixup_pi_state_owner(uaddr, q, owner);
 		goto out;
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a302cf9..57bff78 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -138,7 +138,8 @@
 	unsigned int flags = 0, irq = desc->irq_data.irq;
 	struct irqaction *action = desc->action;
 
-	do {
+	/* action might have become NULL since we dropped the lock */
+	while (action) {
 		irqreturn_t res;
 
 		trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@
 
 		retval |= res;
 		action = action->next;
-	} while (action);
+	}
 
 	add_interrupt_randomness(irq, flags);
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index fcab63c..3d18293 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -160,6 +160,8 @@
 	__irq_put_desc_unlock(desc, flags, false);
 }
 
+#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
+
 /*
  * Manipulation functions for irq_data.state
  */
@@ -188,6 +190,8 @@
 	return __irqd_to_state(d) & mask;
 }
 
+#undef __irqd_to_state
+
 static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
 {
 	__this_cpu_inc(*desc->kstat_irqs);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 6e655f7..3e56d2f0 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -575,10 +575,15 @@
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
 
-	if (fwspec->fwnode)
-		domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
-	else
+	if (fwspec->fwnode) {
+		domain = irq_find_matching_fwnode(fwspec->fwnode,
+						  DOMAIN_BUS_WIRED);
+		if (!domain)
+			domain = irq_find_matching_fwnode(fwspec->fwnode,
+							  DOMAIN_BUS_ANY);
+	} else {
 		domain = irq_default_domain;
+	}
 
 	if (!domain) {
 		pr_warn("no irq domain found for %s !\n",
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 8dc6591..8d34308 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -66,13 +66,15 @@
 	.name  = "Crash kernel",
 	.start = 0,
 	.end   = 0,
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+	.desc  = IORES_DESC_CRASH_KERNEL
 };
 struct resource crashk_low_res = {
 	.name  = "Crash kernel",
 	.start = 0,
 	.end   = 0,
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+	.desc  = IORES_DESC_CRASH_KERNEL
 };
 
 int kexec_should_crash(struct task_struct *p)
@@ -959,7 +961,7 @@
 
 	ram_res->start = end;
 	ram_res->end = crashk_res.end;
-	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 	ram_res->name = "System RAM";
 
 	crashk_res.end = end - 1;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 007b791..56b18eb 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -524,10 +524,10 @@
 
 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
 	if (image->type == KEXEC_TYPE_CRASH)
-		ret = walk_iomem_res("Crash kernel",
-				     IORESOURCE_MEM | IORESOURCE_BUSY,
-				     crashk_res.start, crashk_res.end, kbuf,
-				     locate_mem_hole_callback);
+		ret = walk_iomem_res_desc(crashk_res.desc,
+				IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+				crashk_res.start, crashk_res.end, kbuf,
+				locate_mem_hole_callback);
 	else
 		ret = walk_system_ram_res(0, -1, kbuf,
 					  locate_mem_hole_callback);
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index a028127..b5c30d9 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -47,12 +47,12 @@
  * of times)
  */
 
-#include <linux/latencytop.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 #include <linux/proc_fs.h>
+#include <linux/latencytop.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/list.h>
@@ -289,4 +289,16 @@
 	proc_create("latency_stats", 0644, NULL, &lstats_fops);
 	return 0;
 }
+
+int sysctl_latencytop(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int err;
+
+	err = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (latencytop_enabled)
+		force_schedstat_enabled();
+
+	return err;
+}
 device_initcall(init_lstats_procfs);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56..f894a2c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -123,8 +123,6 @@
 	return ret;
 }
 
-static int lockdep_initialized;
-
 unsigned long nr_list_entries;
 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 
@@ -292,7 +290,7 @@
 #define __classhashfn(key)	hash_long((unsigned long)key, CLASSHASH_BITS)
 #define classhashentry(key)	(classhash_table + __classhashfn((key)))
 
-static struct list_head classhash_table[CLASSHASH_SIZE];
+static struct hlist_head classhash_table[CLASSHASH_SIZE];
 
 /*
  * We put the lock dependency chains into a hash-table as well, to cache
@@ -303,7 +301,7 @@
 #define __chainhashfn(chain)	hash_long(chain, CHAINHASH_BITS)
 #define chainhashentry(chain)	(chainhash_table + __chainhashfn((chain)))
 
-static struct list_head chainhash_table[CHAINHASH_SIZE];
+static struct hlist_head chainhash_table[CHAINHASH_SIZE];
 
 /*
  * The hash key of the lock dependency chains is a hash itself too:
@@ -434,19 +432,6 @@
 
 #ifdef CONFIG_DEBUG_LOCKDEP
 /*
- * We cannot printk in early bootup code. Not even early_printk()
- * might work. So we mark any initialization errors and printk
- * about it later on, in lockdep_info().
- */
-static int lockdep_init_error;
-static const char *lock_init_error;
-static unsigned long lockdep_init_trace_data[20];
-static struct stack_trace lockdep_init_trace = {
-	.max_entries = ARRAY_SIZE(lockdep_init_trace_data),
-	.entries = lockdep_init_trace_data,
-};
-
-/*
  * Various lockdep statistics:
  */
 DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
@@ -666,23 +651,9 @@
 look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
 {
 	struct lockdep_subclass_key *key;
-	struct list_head *hash_head;
+	struct hlist_head *hash_head;
 	struct lock_class *class;
 
-#ifdef CONFIG_DEBUG_LOCKDEP
-	/*
-	 * If the architecture calls into lockdep before initializing
-	 * the hashes then we'll warn about it later. (we cannot printk
-	 * right now)
-	 */
-	if (unlikely(!lockdep_initialized)) {
-		lockdep_init();
-		lockdep_init_error = 1;
-		lock_init_error = lock->name;
-		save_stack_trace(&lockdep_init_trace);
-	}
-#endif
-
 	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
 		debug_locks_off();
 		printk(KERN_ERR
@@ -719,7 +690,7 @@
 	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
 		return NULL;
 
-	list_for_each_entry_rcu(class, hash_head, hash_entry) {
+	hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
 		if (class->key == key) {
 			/*
 			 * Huh! same key, different name? Did someone trample
@@ -742,7 +713,7 @@
 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 {
 	struct lockdep_subclass_key *key;
-	struct list_head *hash_head;
+	struct hlist_head *hash_head;
 	struct lock_class *class;
 
 	DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -774,7 +745,7 @@
 	 * We have to do the hash-walk again, to avoid races
 	 * with another CPU:
 	 */
-	list_for_each_entry_rcu(class, hash_head, hash_entry) {
+	hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
 		if (class->key == key)
 			goto out_unlock_set;
 	}
@@ -805,7 +776,7 @@
 	 * We use RCU's safe list-add method to make
 	 * parallel walking of the hash-list safe:
 	 */
-	list_add_tail_rcu(&class->hash_entry, hash_head);
+	hlist_add_head_rcu(&class->hash_entry, hash_head);
 	/*
 	 * Add it to the global list of classes:
 	 */
@@ -1822,7 +1793,7 @@
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next, int distance, int trylock_loop)
+	       struct held_lock *next, int distance, int *stack_saved)
 {
 	struct lock_list *entry;
 	int ret;
@@ -1883,8 +1854,11 @@
 		}
 	}
 
-	if (!trylock_loop && !save_trace(&trace))
-		return 0;
+	if (!*stack_saved) {
+		if (!save_trace(&trace))
+			return 0;
+		*stack_saved = 1;
+	}
 
 	/*
 	 * Ok, all validations passed, add the new lock
@@ -1907,6 +1881,8 @@
 	 * Debugging printouts:
 	 */
 	if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
+		/* We drop graph lock, so another thread can overwrite trace. */
+		*stack_saved = 0;
 		graph_unlock();
 		printk("\n new dependency: ");
 		print_lock_name(hlock_class(prev));
@@ -1929,7 +1905,7 @@
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
 	int depth = curr->lockdep_depth;
-	int trylock_loop = 0;
+	int stack_saved = 0;
 	struct held_lock *hlock;
 
 	/*
@@ -1956,7 +1932,7 @@
 		 */
 		if (hlock->read != 2 && hlock->check) {
 			if (!check_prev_add(curr, hlock, next,
-						distance, trylock_loop))
+						distance, &stack_saved))
 				return 0;
 			/*
 			 * Stop after the first non-trylock entry,
@@ -1979,7 +1955,6 @@
 		if (curr->held_locks[depth].irq_context !=
 				curr->held_locks[depth-1].irq_context)
 			break;
-		trylock_loop = 1;
 	}
 	return 1;
 out_bug:
@@ -2007,6 +1982,53 @@
 }
 
 /*
+ * Returns the index of the first held_lock of the current chain
+ */
+static inline int get_first_held_lock(struct task_struct *curr,
+					struct held_lock *hlock)
+{
+	int i;
+	struct held_lock *hlock_curr;
+
+	for (i = curr->lockdep_depth - 1; i >= 0; i--) {
+		hlock_curr = curr->held_locks + i;
+		if (hlock_curr->irq_context != hlock->irq_context)
+			break;
+
+	}
+
+	return ++i;
+}
+
+/*
+ * Checks whether the chain and the current held locks are consistent
+ * in depth and also in content. If they are not it most likely means
+ * that there was a collision during the calculation of the chain_key.
+ * Returns: 0 not passed, 1 passed
+ */
+static int check_no_collision(struct task_struct *curr,
+			struct held_lock *hlock,
+			struct lock_chain *chain)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+	int i, j, id;
+
+	i = get_first_held_lock(curr, hlock);
+
+	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+		return 0;
+
+	for (j = 0; j < chain->depth - 1; j++, i++) {
+		id = curr->held_locks[i].class_idx - 1;
+
+		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+			return 0;
+	}
+#endif
+	return 1;
+}
+
+/*
  * Look up a dependency chain. If the key is not present yet then
  * add it and return 1 - in this case the new dependency chain is
  * validated. If the key is already hashed, return 0.
@@ -2017,9 +2039,8 @@
 				     u64 chain_key)
 {
 	struct lock_class *class = hlock_class(hlock);
-	struct list_head *hash_head = chainhashentry(chain_key);
+	struct hlist_head *hash_head = chainhashentry(chain_key);
 	struct lock_chain *chain;
-	struct held_lock *hlock_curr;
 	int i, j;
 
 	/*
@@ -2033,10 +2054,13 @@
 	 * We can walk it lock-free, because entries only get added
 	 * to the hash:
 	 */
-	list_for_each_entry_rcu(chain, hash_head, entry) {
+	hlist_for_each_entry_rcu(chain, hash_head, entry) {
 		if (chain->chain_key == chain_key) {
 cache_hit:
 			debug_atomic_inc(chain_lookup_hits);
+			if (!check_no_collision(curr, hlock, chain))
+				return 0;
+
 			if (very_verbose(class))
 				printk("\nhash chain already cached, key: "
 					"%016Lx tail class: [%p] %s\n",
@@ -2057,7 +2081,7 @@
 	/*
 	 * We have to walk the chain again locked - to avoid duplicates:
 	 */
-	list_for_each_entry(chain, hash_head, entry) {
+	hlist_for_each_entry(chain, hash_head, entry) {
 		if (chain->chain_key == chain_key) {
 			graph_unlock();
 			goto cache_hit;
@@ -2074,13 +2098,7 @@
 	chain = lock_chains + nr_lock_chains++;
 	chain->chain_key = chain_key;
 	chain->irq_context = hlock->irq_context;
-	/* Find the first held_lock of current chain */
-	for (i = curr->lockdep_depth - 1; i >= 0; i--) {
-		hlock_curr = curr->held_locks + i;
-		if (hlock_curr->irq_context != hlock->irq_context)
-			break;
-	}
-	i++;
+	i = get_first_held_lock(curr, hlock);
 	chain->depth = curr->lockdep_depth + 1 - i;
 	if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
 		chain->base = nr_chain_hlocks;
@@ -2091,7 +2109,7 @@
 		}
 		chain_hlocks[chain->base + j] = class - lock_classes;
 	}
-	list_add_tail_rcu(&chain->entry, hash_head);
+	hlist_add_head_rcu(&chain->entry, hash_head);
 	debug_atomic_inc(chain_lookup_misses);
 	inc_chains();
 
@@ -2168,7 +2186,7 @@
 {
 #ifdef CONFIG_DEBUG_LOCKDEP
 	struct held_lock *hlock, *prev_hlock = NULL;
-	unsigned int i, id;
+	unsigned int i;
 	u64 chain_key = 0;
 
 	for (i = 0; i < curr->lockdep_depth; i++) {
@@ -2185,17 +2203,16 @@
 				(unsigned long long)hlock->prev_chain_key);
 			return;
 		}
-		id = hlock->class_idx - 1;
 		/*
 		 * Whoops ran out of static storage again?
 		 */
-		if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+		if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
 			return;
 
 		if (prev_hlock && (prev_hlock->irq_context !=
 							hlock->irq_context))
 			chain_key = 0;
-		chain_key = iterate_chain_key(chain_key, id);
+		chain_key = iterate_chain_key(chain_key, hlock->class_idx);
 		prev_hlock = hlock;
 	}
 	if (chain_key != curr->curr_chain_key) {
@@ -3073,7 +3090,7 @@
 	struct task_struct *curr = current;
 	struct lock_class *class = NULL;
 	struct held_lock *hlock;
-	unsigned int depth, id;
+	unsigned int depth;
 	int chain_head = 0;
 	int class_idx;
 	u64 chain_key;
@@ -3176,11 +3193,10 @@
 	 * The 'key ID' is what is the most compact key value to drive
 	 * the hash, not class->key.
 	 */
-	id = class - lock_classes;
 	/*
 	 * Whoops, we did it again.. ran straight out of our static allocation.
 	 */
-	if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+	if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
 		return 0;
 
 	chain_key = curr->curr_chain_key;
@@ -3198,7 +3214,7 @@
 		chain_key = 0;
 		chain_head = 1;
 	}
-	chain_key = iterate_chain_key(chain_key, id);
+	chain_key = iterate_chain_key(chain_key, class_idx);
 
 	if (nest_lock && !__lock_is_held(nest_lock))
 		return print_lock_nested_lock_not_held(curr, hlock, ip);
@@ -3875,7 +3891,7 @@
 	nr_process_chains = 0;
 	debug_locks = 1;
 	for (i = 0; i < CHAINHASH_SIZE; i++)
-		INIT_LIST_HEAD(chainhash_table + i);
+		INIT_HLIST_HEAD(chainhash_table + i);
 	raw_local_irq_restore(flags);
 }
 
@@ -3894,7 +3910,7 @@
 	/*
 	 * Unhash the class and remove it from the all_lock_classes list:
 	 */
-	list_del_rcu(&class->hash_entry);
+	hlist_del_rcu(&class->hash_entry);
 	list_del_rcu(&class->lock_entry);
 
 	RCU_INIT_POINTER(class->key, NULL);
@@ -3917,7 +3933,7 @@
 void lockdep_free_key_range(void *start, unsigned long size)
 {
 	struct lock_class *class;
-	struct list_head *head;
+	struct hlist_head *head;
 	unsigned long flags;
 	int i;
 	int locked;
@@ -3930,9 +3946,7 @@
 	 */
 	for (i = 0; i < CLASSHASH_SIZE; i++) {
 		head = classhash_table + i;
-		if (list_empty(head))
-			continue;
-		list_for_each_entry_rcu(class, head, hash_entry) {
+		hlist_for_each_entry_rcu(class, head, hash_entry) {
 			if (within(class->key, start, size))
 				zap_class(class);
 			else if (within(class->name, start, size))
@@ -3962,7 +3976,7 @@
 void lockdep_reset_lock(struct lockdep_map *lock)
 {
 	struct lock_class *class;
-	struct list_head *head;
+	struct hlist_head *head;
 	unsigned long flags;
 	int i, j;
 	int locked;
@@ -3987,9 +4001,7 @@
 	locked = graph_lock();
 	for (i = 0; i < CLASSHASH_SIZE; i++) {
 		head = classhash_table + i;
-		if (list_empty(head))
-			continue;
-		list_for_each_entry_rcu(class, head, hash_entry) {
+		hlist_for_each_entry_rcu(class, head, hash_entry) {
 			int match = 0;
 
 			for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
@@ -4013,28 +4025,6 @@
 	raw_local_irq_restore(flags);
 }
 
-void lockdep_init(void)
-{
-	int i;
-
-	/*
-	 * Some architectures have their own start_kernel()
-	 * code which calls lockdep_init(), while we also
-	 * call lockdep_init() from the start_kernel() itself,
-	 * and we want to initialize the hashes only once:
-	 */
-	if (lockdep_initialized)
-		return;
-
-	for (i = 0; i < CLASSHASH_SIZE; i++)
-		INIT_LIST_HEAD(classhash_table + i);
-
-	for (i = 0; i < CHAINHASH_SIZE; i++)
-		INIT_LIST_HEAD(chainhash_table + i);
-
-	lockdep_initialized = 1;
-}
-
 void __init lockdep_info(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
@@ -4061,14 +4051,6 @@
 
 	printk(" per task-struct memory footprint: %lu bytes\n",
 		sizeof(struct held_lock) * MAX_LOCK_DEPTH);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
-	if (lockdep_init_error) {
-		printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
-		printk("Call stack leading to lockdep invocation was:\n");
-		print_stack_trace(&lockdep_init_trace, 0);
-	}
-#endif
 }
 
 static void
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 5b9102a..c835270 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -67,7 +67,13 @@
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg_acquire(lock, node);
+	/*
+	 * We rely on the full barrier with global transitivity implied by the
+	 * below xchg() to order the initialization stores above against any
+	 * observation of @node. And to provide the ACQUIRE ordering associated
+	 * with a LOCK primitive.
+	 */
+	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/*
 		 * Lock acquired, don't need to set node->locked to 1. Threads
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0551c21..e364b42 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -716,6 +716,7 @@
 __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
 {
 	unsigned long flags;
+	WAKE_Q(wake_q);
 
 	/*
 	 * As a performance measurement, release the lock before doing other
@@ -743,11 +744,11 @@
 					   struct mutex_waiter, list);
 
 		debug_mutex_wake_waiter(lock, waiter);
-
-		wake_up_process(waiter->task);
+		wake_q_add(&wake_q, waiter->task);
 	}
 
 	spin_unlock_mutex(&lock->wait_lock, flags);
+	wake_up_q(&wake_q);
 }
 
 /*
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 393d187..ce2f75e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -358,8 +358,7 @@
 	 * sequentiality; this is because not all clear_pending_set_locked()
 	 * implementations imply full barriers.
 	 */
-	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
-		cpu_relax();
+	smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
 
 	/*
 	 * take ownership and clear the pending bit.
@@ -435,7 +434,7 @@
 	 *
 	 * The PV pv_wait_head_or_lock function, if active, will acquire
 	 * the lock and return a non-zero value. So we have to skip the
-	 * smp_load_acquire() call. As the next PV queue head hasn't been
+	 * smp_cond_acquire() call. As the next PV queue head hasn't been
 	 * designated yet, there is no way for the locked value to become
 	 * _Q_SLOW_VAL. So both the set_locked() and the
 	 * atomic_cmpxchg_relaxed() calls will be safe.
@@ -466,7 +465,7 @@
 			break;
 		}
 		/*
-		 * The smp_load_acquire() call above has provided the necessary
+		 * The smp_cond_acquire() call above has provided the necessary
 		 * acquire semantics required for locking. At most two
 		 * iterations of this loop may be ran.
 		 */
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 87bb235..21ede57 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -55,6 +55,11 @@
 };
 
 /*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
  * By replacing the regular queued_spin_trylock() with the function below,
  * it will be called once when a lock waiter enter the PV slowpath before
  * being queued. By allowing one lock stealing attempt here when the pending
@@ -65,9 +70,11 @@
 static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
+	int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+		   (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
 
-	return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
-		(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
+	qstat_inc(qstat_pv_lock_stealing, ret);
+	return ret;
 }
 
 /*
@@ -138,11 +145,6 @@
 #endif /* _Q_PENDING_BITS == 8 */
 
 /*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
-/*
  * Lock and MCS node addresses hash table for fast lookup
  *
  * Hashing is done on a per-cacheline basis to minimize the need to access
@@ -398,6 +400,11 @@
 	if (READ_ONCE(pn->state) == vcpu_hashed)
 		lp = (struct qspinlock **)1;
 
+	/*
+	 * Tracking # of slowpath locking operations
+	 */
+	qstat_inc(qstat_pv_lock_slowpath, true);
+
 	for (;; waitcnt++) {
 		/*
 		 * Set correct vCPU state to be used by queue node wait-early
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 640dcec..eb2a2c9 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -22,6 +22,7 @@
  *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake
  *   pv_latency_kick	- average latency (ns) of vCPU kick operation
  *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
+ *   pv_lock_slowpath	- # of locking operations via the slowpath
  *   pv_lock_stealing	- # of lock stealing operations
  *   pv_spurious_wakeup	- # of spurious wakeups
  *   pv_wait_again	- # of vCPU wait's that happened after a vCPU kick
@@ -45,6 +46,7 @@
 	qstat_pv_kick_wake,
 	qstat_pv_latency_kick,
 	qstat_pv_latency_wake,
+	qstat_pv_lock_slowpath,
 	qstat_pv_lock_stealing,
 	qstat_pv_spurious_wakeup,
 	qstat_pv_wait_again,
@@ -70,6 +72,7 @@
 	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
 	[qstat_pv_latency_kick]	   = "pv_latency_kick",
 	[qstat_pv_latency_wake]    = "pv_latency_wake",
+	[qstat_pv_lock_slowpath]   = "pv_lock_slowpath",
 	[qstat_pv_lock_stealing]   = "pv_lock_stealing",
 	[qstat_pv_wait_again]      = "pv_wait_again",
 	[qstat_pv_wait_early]      = "pv_wait_early",
@@ -279,19 +282,6 @@
 #define pv_kick(c)	__pv_kick(c)
 #define pv_wait(p, v)	__pv_wait(p, v)
 
-/*
- * PV unfair trylock count tracking function
- */
-static inline int qstat_spin_steal_lock(struct qspinlock *lock)
-{
-	int ret = pv_queued_spin_steal_lock(lock);
-
-	qstat_inc(qstat_pv_lock_stealing, ret);
-	return ret;
-}
-#undef  queued_spin_trylock
-#define queued_spin_trylock(l)	qstat_spin_steal_lock(l)
-
 #else /* CONFIG_QUEUED_LOCK_STAT */
 
 static inline void qstat_inc(enum qlock_stats stat, bool cond)	{ }
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8251e75..3e74660 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -99,13 +99,14 @@
  * 2) Drop lock->wait_lock
  * 3) Try to unlock the lock with cmpxchg
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+					unsigned long flags)
 	__releases(lock->wait_lock)
 {
 	struct task_struct *owner = rt_mutex_owner(lock);
 
 	clear_rt_mutex_waiters(lock);
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 	/*
 	 * If a new waiter comes in between the unlock and the cmpxchg
 	 * we have two situations:
@@ -147,11 +148,12 @@
 /*
  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+					unsigned long flags)
 	__releases(lock->wait_lock)
 {
 	lock->owner = NULL;
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 	return true;
 }
 #endif
@@ -433,7 +435,6 @@
 	int ret = 0, depth = 0;
 	struct rt_mutex *lock;
 	bool detect_deadlock;
-	unsigned long flags;
 	bool requeue = true;
 
 	detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@
 	/*
 	 * [1] Task cannot go away as we did a get_task() before !
 	 */
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock_irq(&task->pi_lock);
 
 	/*
 	 * [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@
 	 * operations.
 	 */
 	if (!raw_spin_trylock(&lock->wait_lock)) {
-		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock_irq(&task->pi_lock);
 		cpu_relax();
 		goto retry;
 	}
@@ -591,7 +592,7 @@
 		/*
 		 * No requeue[7] here. Just release @task [8]
 		 */
-		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+		raw_spin_unlock(&task->pi_lock);
 		put_task_struct(task);
 
 		/*
@@ -599,14 +600,14 @@
 		 * If there is no owner of the lock, end of chain.
 		 */
 		if (!rt_mutex_owner(lock)) {
-			raw_spin_unlock(&lock->wait_lock);
+			raw_spin_unlock_irq(&lock->wait_lock);
 			return 0;
 		}
 
 		/* [10] Grab the next task, i.e. owner of @lock */
 		task = rt_mutex_owner(lock);
 		get_task_struct(task);
-		raw_spin_lock_irqsave(&task->pi_lock, flags);
+		raw_spin_lock(&task->pi_lock);
 
 		/*
 		 * No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@
 		top_waiter = rt_mutex_top_waiter(lock);
 
 		/* [13] Drop locks */
-		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock(&task->pi_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 
 		/* If owner is not blocked, end of chain. */
 		if (!next_lock)
@@ -643,7 +644,7 @@
 	rt_mutex_enqueue(lock, waiter);
 
 	/* [8] Release the task */
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock(&task->pi_lock);
 	put_task_struct(task);
 
 	/*
@@ -661,14 +662,14 @@
 		 */
 		if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
 			wake_up_process(rt_mutex_top_waiter(lock)->task);
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 		return 0;
 	}
 
 	/* [10] Grab the next task, i.e. the owner of @lock */
 	task = rt_mutex_owner(lock);
 	get_task_struct(task);
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock(&task->pi_lock);
 
 	/* [11] requeue the pi waiters if necessary */
 	if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@
 	top_waiter = rt_mutex_top_waiter(lock);
 
 	/* [13] Drop the locks */
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock(&task->pi_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	/*
 	 * Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@
 	goto again;
 
  out_unlock_pi:
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock_irq(&task->pi_lock);
  out_put_task:
 	put_task_struct(task);
 
@@ -756,7 +757,7 @@
 /*
  * Try to take an rt-mutex
  *
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
  *
  * @lock:   The lock to be acquired.
  * @task:   The task which wants to acquire the lock
@@ -766,8 +767,6 @@
 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 				struct rt_mutex_waiter *waiter)
 {
-	unsigned long flags;
-
 	/*
 	 * Before testing whether we can acquire @lock, we set the
 	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@
 	 * case, but conditionals are more expensive than a redundant
 	 * store.
 	 */
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock(&task->pi_lock);
 	task->pi_blocked_on = NULL;
 	/*
 	 * Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@
 	 */
 	if (rt_mutex_has_waiters(lock))
 		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock(&task->pi_lock);
 
 takeit:
 	/* We got the lock. */
@@ -883,7 +882,7 @@
  *
  * Prepare waiter and propagate pi chain
  *
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 				   struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@
 	struct rt_mutex_waiter *top_waiter = waiter;
 	struct rt_mutex *next_lock;
 	int chain_walk = 0, res;
-	unsigned long flags;
 
 	/*
 	 * Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@
 	if (owner == task)
 		return -EDEADLK;
 
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	raw_spin_lock(&task->pi_lock);
 	__rt_mutex_adjust_prio(task);
 	waiter->task = task;
 	waiter->lock = lock;
@@ -921,12 +919,12 @@
 
 	task->pi_blocked_on = waiter;
 
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	raw_spin_unlock(&task->pi_lock);
 
 	if (!owner)
 		return 0;
 
-	raw_spin_lock_irqsave(&owner->pi_lock, flags);
+	raw_spin_lock(&owner->pi_lock);
 	if (waiter == rt_mutex_top_waiter(lock)) {
 		rt_mutex_dequeue_pi(owner, top_waiter);
 		rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@
 	/* Store the lock on which owner is blocked or NULL */
 	next_lock = task_blocked_on_lock(owner);
 
-	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+	raw_spin_unlock(&owner->pi_lock);
 	/*
 	 * Even if full deadlock detection is on, if the owner is not
 	 * blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@
 	 */
 	get_task_struct(owner);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
 					 next_lock, waiter, task);
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 
 	return res;
 }
@@ -971,15 +969,14 @@
  * Remove the top waiter from the current tasks pi waiter tree and
  * queue it up.
  *
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
  */
 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 				    struct rt_mutex *lock)
 {
 	struct rt_mutex_waiter *waiter;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&current->pi_lock, flags);
+	raw_spin_lock(&current->pi_lock);
 
 	waiter = rt_mutex_top_waiter(lock);
 
@@ -1001,7 +998,7 @@
 	 */
 	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+	raw_spin_unlock(&current->pi_lock);
 
 	wake_q_add(wake_q, waiter->task);
 }
@@ -1009,7 +1006,7 @@
 /*
  * Remove a waiter from a lock and give up
  *
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
  * have just failed to try_to_take_rt_mutex().
  */
 static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@
 	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
 	struct task_struct *owner = rt_mutex_owner(lock);
 	struct rt_mutex *next_lock;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&current->pi_lock, flags);
+	raw_spin_lock(&current->pi_lock);
 	rt_mutex_dequeue(lock, waiter);
 	current->pi_blocked_on = NULL;
-	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+	raw_spin_unlock(&current->pi_lock);
 
 	/*
 	 * Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@
 	if (!owner || !is_top_waiter)
 		return;
 
-	raw_spin_lock_irqsave(&owner->pi_lock, flags);
+	raw_spin_lock(&owner->pi_lock);
 
 	rt_mutex_dequeue_pi(owner, waiter);
 
@@ -1044,7 +1040,7 @@
 	/* Store the lock on which owner is blocked or NULL */
 	next_lock = task_blocked_on_lock(owner);
 
-	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+	raw_spin_unlock(&owner->pi_lock);
 
 	/*
 	 * Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@
 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
 	get_task_struct(owner);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
 				   next_lock, NULL, current);
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 }
 
 /*
@@ -1097,11 +1093,11 @@
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:		 the rt_mutex to take
  * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
- * 			 or TASK_UNINTERRUPTIBLE)
+ *			 or TASK_UNINTERRUPTIBLE)
  * @timeout:		 the pre-initialized and started timer, or NULL for none
  * @waiter:		 the pre-initialized rt_mutex_waiter
  *
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched
 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@
 				break;
 		}
 
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 
 		debug_rt_mutex_print_deadlock(waiter);
 
 		schedule();
 
-		raw_spin_lock(&lock->wait_lock);
+		raw_spin_lock_irq(&lock->wait_lock);
 		set_current_state(state);
 	}
 
@@ -1172,17 +1168,26 @@
 		  enum rtmutex_chainwalk chwalk)
 {
 	struct rt_mutex_waiter waiter;
+	unsigned long flags;
 	int ret = 0;
 
 	debug_rt_mutex_init_waiter(&waiter);
 	RB_CLEAR_NODE(&waiter.pi_tree_entry);
 	RB_CLEAR_NODE(&waiter.tree_entry);
 
-	raw_spin_lock(&lock->wait_lock);
+	/*
+	 * Technically we could use raw_spin_[un]lock_irq() here, but this can
+	 * be called in early boot if the cmpxchg() fast path is disabled
+	 * (debug, no architecture support). In this case we will acquire the
+	 * rtmutex with lock->wait_lock held. But we cannot unconditionally
+	 * enable interrupts in that early boot case. So we need to use the
+	 * irqsave/restore variants.
+	 */
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
 	/* Try to acquire the lock again: */
 	if (try_to_take_rt_mutex(lock, current, NULL)) {
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 		return 0;
 	}
 
@@ -1211,7 +1216,7 @@
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	/* Remove pending timer: */
 	if (unlikely(timeout))
@@ -1227,6 +1232,7 @@
  */
 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
+	unsigned long flags;
 	int ret;
 
 	/*
@@ -1238,10 +1244,10 @@
 		return 0;
 
 	/*
-	 * The mutex has currently no owner. Lock the wait lock and
-	 * try to acquire the lock.
+	 * The mutex has currently no owner. Lock the wait lock and try to
+	 * acquire the lock. We use irqsave here to support early boot calls.
 	 */
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
 	ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1251,7 +1257,7 @@
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	return ret;
 }
@@ -1263,7 +1269,10 @@
 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
 					struct wake_q_head *wake_q)
 {
-	raw_spin_lock(&lock->wait_lock);
+	unsigned long flags;
+
+	/* irqsave required to support early boot calls */
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
 	debug_rt_mutex_unlock(lock);
 
@@ -1302,10 +1311,10 @@
 	 */
 	while (!rt_mutex_has_waiters(lock)) {
 		/* Drops lock->wait_lock ! */
-		if (unlock_rt_mutex_safe(lock) == true)
+		if (unlock_rt_mutex_safe(lock, flags) == true)
 			return false;
 		/* Relock the rtmutex and try again */
-		raw_spin_lock(&lock->wait_lock);
+		raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	}
 
 	/*
@@ -1316,7 +1325,7 @@
 	 */
 	mark_wakeup_next_waiter(wake_q, lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	/* check PI boosting */
 	return true;
@@ -1596,10 +1605,10 @@
 {
 	int ret;
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 
 	if (try_to_take_rt_mutex(lock, task, NULL)) {
-		raw_spin_unlock(&lock->wait_lock);
+		raw_spin_unlock_irq(&lock->wait_lock);
 		return 1;
 	}
 
@@ -1620,7 +1629,7 @@
 	if (unlikely(ret))
 		remove_waiter(lock, waiter);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	debug_rt_mutex_print_deadlock(waiter);
 
@@ -1668,7 +1677,7 @@
 {
 	int ret;
 
-	raw_spin_lock(&lock->wait_lock);
+	raw_spin_lock_irq(&lock->wait_lock);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1684,7 +1693,7 @@
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	raw_spin_unlock(&lock->wait_lock);
+	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
 }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index e517a16..fb9b887 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -29,10 +29,10 @@
 
 static void *try_ram_remap(resource_size_t offset, size_t size)
 {
-	struct page *page = pfn_to_page(offset >> PAGE_SHIFT);
+	unsigned long pfn = PHYS_PFN(offset);
 
 	/* In the simple case just return the existing linear address */
-	if (!PageHighMem(page))
+	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
 		return __va(offset);
 	return NULL; /* fallback to ioremap_cache */
 }
@@ -47,7 +47,7 @@
  * being mapped does not have i/o side effects and the __iomem
  * annotation is not applicable.
  *
- * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * MEMREMAP_WB - matches the default mapping for System RAM on
  * the architecture.  This is usually a read-allocate write-back cache.
  * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
  * memremap() will bypass establishing a new mapping and instead return
@@ -56,11 +56,12 @@
  * MEMREMAP_WT - establish a mapping whereby writes either bypass the
  * cache or are written through to memory and never exist in a
  * cache-dirty state with respect to program visibility.  Attempts to
- * map "System RAM" with this mapping type will fail.
+ * map System RAM with this mapping type will fail.
  */
 void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 {
-	int is_ram = region_intersects(offset, size, "System RAM");
+	int is_ram = region_intersects(offset, size,
+				       IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 	void *addr = NULL;
 
 	if (is_ram == REGION_MIXED) {
@@ -76,7 +77,7 @@
 		 * MEMREMAP_WB is special in that it can be satisifed
 		 * from the direct map.  Some archs depend on the
 		 * capability of memremap() to autodetect cases where
-		 * the requested range is potentially in "System RAM"
+		 * the requested range is potentially in System RAM.
 		 */
 		if (is_ram == REGION_INTERSECTS)
 			addr = try_ram_remap(offset, size);
@@ -88,7 +89,7 @@
 	 * If we don't have a mapping yet and more request flags are
 	 * pending then we will be attempting to establish a new virtual
 	 * address mapping.  Enforce that this mapping is not aliasing
-	 * "System RAM"
+	 * System RAM.
 	 */
 	if (!addr && is_ram == REGION_INTERSECTS && flags) {
 		WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
@@ -114,7 +115,7 @@
 
 static void devm_memremap_release(struct device *dev, void *res)
 {
-	memunmap(res);
+	memunmap(*(void **)res);
 }
 
 static int devm_memremap_match(struct device *dev, void *res, void *match_data)
@@ -136,8 +137,10 @@
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
-	} else
+	} else {
 		devres_free(ptr);
+		return ERR_PTR(-ENXIO);
+	}
 
 	return addr;
 }
@@ -150,7 +153,7 @@
 }
 EXPORT_SYMBOL(devm_memunmap);
 
-pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
 {
 	return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
 }
@@ -183,7 +186,11 @@
 
 static void pgmap_radix_release(struct resource *res)
 {
-	resource_size_t key;
+	resource_size_t key, align_start, align_size, align_end;
+
+	align_start = res->start & ~(SECTION_SIZE - 1);
+	align_size = ALIGN(resource_size(res), SECTION_SIZE);
+	align_end = align_start + align_size - 1;
 
 	mutex_lock(&pgmap_lock);
 	for (key = res->start; key <= res->end; key += SECTION_SIZE)
@@ -226,12 +233,11 @@
 		percpu_ref_put(pgmap->ref);
 	}
 
-	pgmap_radix_release(res);
-
 	/* pages are dead and unused, undo the arch mapping */
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(resource_size(res), SECTION_SIZE);
 	arch_remove_memory(align_start, align_size);
+	pgmap_radix_release(res);
 	dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
 			"%s: failed to free all reserved pages\n", __func__);
 }
@@ -265,13 +271,17 @@
 void *devm_memremap_pages(struct device *dev, struct resource *res,
 		struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
-	int is_ram = region_intersects(res->start, resource_size(res),
-			"System RAM");
-	resource_size_t key, align_start, align_size;
+	resource_size_t key, align_start, align_size, align_end;
 	struct dev_pagemap *pgmap;
 	struct page_map *page_map;
+	int error, nid, is_ram;
 	unsigned long pfn;
-	int error, nid;
+
+	align_start = res->start & ~(SECTION_SIZE - 1);
+	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
+		- align_start;
+	is_ram = region_intersects(align_start, align_size,
+		IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 
 	if (is_ram == REGION_MIXED) {
 		WARN_ONCE(1, "%s attempted on mixed region %pr\n",
@@ -309,7 +319,8 @@
 
 	mutex_lock(&pgmap_lock);
 	error = 0;
-	for (key = res->start; key <= res->end; key += SECTION_SIZE) {
+	align_end = align_start + align_size - 1;
+	for (key = align_start; key <= align_end; key += SECTION_SIZE) {
 		struct dev_pagemap *dup;
 
 		rcu_read_lock();
@@ -336,8 +347,6 @@
 	if (nid < 0)
 		nid = numa_mem_id();
 
-	align_start = res->start & ~(SECTION_SIZE - 1);
-	align_size = ALIGN(resource_size(res), SECTION_SIZE);
 	error = arch_add_memory(nid, align_start, align_size, true);
 	if (error)
 		goto err_add_memory;
@@ -345,8 +354,13 @@
 	for_each_device_pfn(pfn, page_map) {
 		struct page *page = pfn_to_page(pfn);
 
-		/* ZONE_DEVICE pages must never appear on a slab lru */
-		list_force_poison(&page->lru);
+		/*
+		 * ZONE_DEVICE pages union ->lru with a ->pgmap back
+		 * pointer.  It is a bug if a ZONE_DEVICE page is ever
+		 * freed or placed on a driver-private list.  Seed the
+		 * storage with LIST_POISON* values.
+		 */
+		list_del(&page->lru);
 		page->pgmap = pgmap;
 	}
 	devres_add(dev, page_map);
diff --git a/kernel/module.c b/kernel/module.c
index 8358f46..794ebe8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -303,6 +303,9 @@
 	struct _ddebug *debug;
 	unsigned int num_debug;
 	bool sig_ok;
+#ifdef CONFIG_KALLSYMS
+	unsigned long mod_kallsyms_init_off;
+#endif
 	struct {
 		unsigned int sym, str, mod, vers, info, pcpu;
 	} index;
@@ -981,6 +984,8 @@
 		mod->exit();
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
+	ftrace_release_mod(mod);
+
 	async_synchronize_full();
 
 	/* Store the name of the last unloaded module for diagnostic purposes */
@@ -2480,10 +2485,21 @@
 	strsect->sh_flags |= SHF_ALLOC;
 	strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
 					 info->index.str) | INIT_OFFSET_MASK;
-	mod->init_layout.size = debug_align(mod->init_layout.size);
 	pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
+
+	/* We'll tack temporary mod_kallsyms on the end. */
+	mod->init_layout.size = ALIGN(mod->init_layout.size,
+				      __alignof__(struct mod_kallsyms));
+	info->mod_kallsyms_init_off = mod->init_layout.size;
+	mod->init_layout.size += sizeof(struct mod_kallsyms);
+	mod->init_layout.size = debug_align(mod->init_layout.size);
 }
 
+/*
+ * We use the full symtab and strtab which layout_symtab arranged to
+ * be appended to the init section.  Later we switch to the cut-down
+ * core-only ones.
+ */
 static void add_kallsyms(struct module *mod, const struct load_info *info)
 {
 	unsigned int i, ndst;
@@ -2492,29 +2508,34 @@
 	char *s;
 	Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
 
-	mod->symtab = (void *)symsec->sh_addr;
-	mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+	/* Set up to point into init section. */
+	mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
+
+	mod->kallsyms->symtab = (void *)symsec->sh_addr;
+	mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
 	/* Make sure we get permanent strtab: don't use info->strtab. */
-	mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
+	mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
 
 	/* Set types up while we still have access to sections. */
-	for (i = 0; i < mod->num_symtab; i++)
-		mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
+	for (i = 0; i < mod->kallsyms->num_symtab; i++)
+		mod->kallsyms->symtab[i].st_info
+			= elf_type(&mod->kallsyms->symtab[i], info);
 
-	mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
-	mod->core_strtab = s = mod->core_layout.base + info->stroffs;
-	src = mod->symtab;
-	for (ndst = i = 0; i < mod->num_symtab; i++) {
+	/* Now populate the cut down core kallsyms for after init. */
+	mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
+	mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
+	src = mod->kallsyms->symtab;
+	for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
 		if (i == 0 ||
 		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
 				   info->index.pcpu)) {
 			dst[ndst] = src[i];
-			dst[ndst++].st_name = s - mod->core_strtab;
-			s += strlcpy(s, &mod->strtab[src[i].st_name],
+			dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
+			s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
 				     KSYM_NAME_LEN) + 1;
 		}
 	}
-	mod->core_num_syms = ndst;
+	mod->core_kallsyms.num_symtab = ndst;
 }
 #else
 static inline void layout_symtab(struct module *mod, struct load_info *info)
@@ -3263,9 +3284,8 @@
 	module_put(mod);
 	trim_init_extable(mod);
 #ifdef CONFIG_KALLSYMS
-	mod->num_symtab = mod->core_num_syms;
-	mod->symtab = mod->core_symtab;
-	mod->strtab = mod->core_strtab;
+	/* Switch to core kallsyms now init is done: kallsyms may be walking! */
+	rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
 #endif
 	mod_tree_remove_init(mod);
 	disable_ro_nx(&mod->init_layout);
@@ -3295,6 +3315,7 @@
 	module_put(mod);
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
+	ftrace_release_mod(mod);
 	free_module(mod);
 	wake_up_all(&module_wq);
 	return ret;
@@ -3371,6 +3392,7 @@
 	mod->state = MODULE_STATE_COMING;
 	mutex_unlock(&module_mutex);
 
+	ftrace_module_enable(mod);
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_COMING, mod);
 	return 0;
@@ -3496,7 +3518,7 @@
 
 	/* Module is ready to execute: parsing args may do that. */
 	after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-				  -32768, 32767, NULL,
+				  -32768, 32767, mod,
 				  unknown_module_param_cb);
 	if (IS_ERR(after_dashes)) {
 		err = PTR_ERR(after_dashes);
@@ -3627,6 +3649,11 @@
 	       && (str[2] == '\0' || str[2] == '.');
 }
 
+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
+{
+	return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
+}
+
 static const char *get_ksymbol(struct module *mod,
 			       unsigned long addr,
 			       unsigned long *size,
@@ -3634,6 +3661,7 @@
 {
 	unsigned int i, best = 0;
 	unsigned long nextval;
+	struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
 	/* At worse, next value is at end of module */
 	if (within_module_init(addr, mod))
@@ -3643,32 +3671,32 @@
 
 	/* Scan for closest preceding symbol, and next symbol. (ELF
 	   starts real symbols at 1). */
-	for (i = 1; i < mod->num_symtab; i++) {
-		if (mod->symtab[i].st_shndx == SHN_UNDEF)
+	for (i = 1; i < kallsyms->num_symtab; i++) {
+		if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
 			continue;
 
 		/* We ignore unnamed symbols: they're uninformative
 		 * and inserted at a whim. */
-		if (mod->symtab[i].st_value <= addr
-		    && mod->symtab[i].st_value > mod->symtab[best].st_value
-		    && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-		    && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
+		if (*symname(kallsyms, i) == '\0'
+		    || is_arm_mapping_symbol(symname(kallsyms, i)))
+			continue;
+
+		if (kallsyms->symtab[i].st_value <= addr
+		    && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
 			best = i;
-		if (mod->symtab[i].st_value > addr
-		    && mod->symtab[i].st_value < nextval
-		    && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-		    && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
-			nextval = mod->symtab[i].st_value;
+		if (kallsyms->symtab[i].st_value > addr
+		    && kallsyms->symtab[i].st_value < nextval)
+			nextval = kallsyms->symtab[i].st_value;
 	}
 
 	if (!best)
 		return NULL;
 
 	if (size)
-		*size = nextval - mod->symtab[best].st_value;
+		*size = nextval - kallsyms->symtab[best].st_value;
 	if (offset)
-		*offset = addr - mod->symtab[best].st_value;
-	return mod->strtab + mod->symtab[best].st_name;
+		*offset = addr - kallsyms->symtab[best].st_value;
+	return symname(kallsyms, best);
 }
 
 /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
@@ -3758,19 +3786,21 @@
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		struct mod_kallsyms *kallsyms;
+
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (symnum < mod->num_symtab) {
-			*value = mod->symtab[symnum].st_value;
-			*type = mod->symtab[symnum].st_info;
-			strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
-				KSYM_NAME_LEN);
+		kallsyms = rcu_dereference_sched(mod->kallsyms);
+		if (symnum < kallsyms->num_symtab) {
+			*value = kallsyms->symtab[symnum].st_value;
+			*type = kallsyms->symtab[symnum].st_info;
+			strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
 			strlcpy(module_name, mod->name, MODULE_NAME_LEN);
 			*exported = is_exported(name, *value, mod);
 			preempt_enable();
 			return 0;
 		}
-		symnum -= mod->num_symtab;
+		symnum -= kallsyms->num_symtab;
 	}
 	preempt_enable();
 	return -ERANGE;
@@ -3779,11 +3809,12 @@
 static unsigned long mod_find_symname(struct module *mod, const char *name)
 {
 	unsigned int i;
+	struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
-	for (i = 0; i < mod->num_symtab; i++)
-		if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
-		    mod->symtab[i].st_info != 'U')
-			return mod->symtab[i].st_value;
+	for (i = 0; i < kallsyms->num_symtab; i++)
+		if (strcmp(name, symname(kallsyms, i)) == 0 &&
+		    kallsyms->symtab[i].st_info != 'U')
+			return kallsyms->symtab[i].st_value;
 	return 0;
 }
 
@@ -3822,11 +3853,14 @@
 	module_assert_mutex();
 
 	list_for_each_entry(mod, &modules, list) {
+		/* We hold module_mutex: no need for rcu_dereference_sched */
+		struct mod_kallsyms *kallsyms = mod->kallsyms;
+
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		for (i = 0; i < mod->num_symtab; i++) {
-			ret = fn(data, mod->strtab + mod->symtab[i].st_name,
-				 mod, mod->symtab[i].st_value);
+		for (i = 0; i < kallsyms->num_symtab; i++) {
+			ret = fn(data, symname(kallsyms, i),
+				 mod, kallsyms->symtab[i].st_value);
 			if (ret != 0)
 				return ret;
 		}
diff --git a/kernel/pid.c b/kernel/pid.c
index f4ad91b..4d73a83 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -588,7 +588,7 @@
 
 void __init pidmap_init(void)
 {
-	/* Veryify no one has done anything silly */
+	/* Verify no one has done anything silly: */
 	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
 
 	/* bump default and minimum pid_max based on number of cpus */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 02e8dfa..68d3ebc 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -235,7 +235,7 @@
 
 config APM_EMULATION
 	tristate "Advanced Power Management Emulation"
-	depends on PM && SYS_SUPPORTS_APM_EMULATION
+	depends on SYS_SUPPORTS_APM_EMULATION
 	help
 	  APM is a BIOS specification for saving power using several different
 	  techniques. This is mostly useful for battery powered laptops with
diff --git a/kernel/profile.c b/kernel/profile.c
index 99513e1..5136969 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -59,6 +59,7 @@
 
 	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
 #ifdef CONFIG_SCHEDSTATS
+		force_schedstat_enabled();
 		prof_on = SLEEP_PROFILING;
 		if (str[strlen(sleepstr)] == ',')
 			str += strlen(sleepstr) + 1;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d2988d0..65ae0e5 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -932,12 +932,14 @@
 	int nsynctypes = 0;
 
 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
-	pr_alert("%s" TORTURE_FLAG
-		 " Grace periods expedited from boot/sysfs for %s,\n",
-		 torture_type, cur_ops->name);
-	pr_alert("%s" TORTURE_FLAG
-		 " Testing of dynamic grace-period expediting diabled.\n",
-		 torture_type);
+	if (!can_expedite) {
+		pr_alert("%s" TORTURE_FLAG
+			 " Grace periods expedited from boot/sysfs for %s,\n",
+			 torture_type, cur_ops->name);
+		pr_alert("%s" TORTURE_FLAG
+			 " Disabled dynamic grace-period expediting.\n",
+			 torture_type);
+	}
 
 	/* Initialize synctype[] array.  If none set, take default. */
 	if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index e492a52..196f030 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -23,7 +23,7 @@
  */
 
 #include <linux/kthread.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
@@ -122,18 +122,7 @@
 	debugfs_remove_recursive(rcudir);
 	return 1;
 }
-
-static void __exit rcutiny_trace_cleanup(void)
-{
-	debugfs_remove_recursive(rcudir);
-}
-
-module_init(rcutiny_trace_init);
-module_exit(rcutiny_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
-MODULE_LICENSE("GPL");
+device_initcall(rcutiny_trace_init);
 
 static void check_cpu_stall(struct rcu_ctrlblk *rcp)
 {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e41dd413..55cea18 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -108,7 +108,6 @@
 RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
 
 static struct rcu_state *const rcu_state_p;
-static struct rcu_data __percpu *const rcu_data_p;
 LIST_HEAD(rcu_struct_flavors);
 
 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -1083,13 +1082,12 @@
 	rcu_sysidle_check_cpu(rdp, isidle, maxj);
 	if ((rdp->dynticks_snap & 0x1) == 0) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
-		return 1;
-	} else {
 		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
 				 rdp->mynode->gpnum))
 			WRITE_ONCE(rdp->gpwrap, true);
-		return 0;
+		return 1;
 	}
+	return 0;
 }
 
 /*
@@ -1173,15 +1171,16 @@
 			smp_mb(); /* ->cond_resched_completed before *rcrmp. */
 			WRITE_ONCE(*rcrmp,
 				   READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
-			resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
-			rdp->rsp->jiffies_resched += 5; /* Enable beating. */
-		} else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
-			/* Time to beat on that CPU again! */
-			resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
-			rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 		}
+		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
+	/* And if it has been a really long time, kick the CPU as well. */
+	if (ULONG_CMP_GE(jiffies,
+			 rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) ||
+	    ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs))
+		resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
+
 	return 0;
 }
 
@@ -1246,7 +1245,7 @@
 				if (rnp->qsmask & (1UL << cpu))
 					dump_cpu_task(rnp->grplo + cpu);
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
 
@@ -1266,12 +1265,12 @@
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	delta = jiffies - READ_ONCE(rsp->jiffies_stall);
 	if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	WRITE_ONCE(rsp->jiffies_stall,
 		   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	/*
 	 * OK, time to rat on our buddy...
@@ -1292,7 +1291,7 @@
 					ndetected++;
 				}
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 
 	print_cpu_stall_info_end();
@@ -1357,7 +1356,7 @@
 	if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
 		WRITE_ONCE(rsp->jiffies_stall,
 			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	/*
 	 * Attempt to revive the RCU machinery by forcing a context switch.
@@ -1595,7 +1594,7 @@
 	}
 unlock_out:
 	if (rnp != rnp_root)
-		raw_spin_unlock(&rnp_root->lock);
+		raw_spin_unlock_rcu_node(rnp_root);
 out:
 	if (c_out != NULL)
 		*c_out = c;
@@ -1614,7 +1613,6 @@
 	int needmore;
 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 
-	rcu_nocb_gp_cleanup(rsp, rnp);
 	rnp->need_future_gp[c & 0x1] = 0;
 	needmore = rnp->need_future_gp[(c + 1) & 0x1];
 	trace_rcu_future_gp(rnp, rdp, c,
@@ -1635,7 +1633,7 @@
 	    !READ_ONCE(rsp->gp_flags) ||
 	    !rsp->gp_kthread)
 		return;
-	wake_up(&rsp->gp_wq);
+	swake_up(&rsp->gp_wq);
 }
 
 /*
@@ -1815,7 +1813,7 @@
 		return;
 	}
 	needwake = __note_gp_changes(rsp, rnp, rdp);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	if (needwake)
 		rcu_gp_kthread_wake(rsp);
 }
@@ -1840,7 +1838,7 @@
 	raw_spin_lock_irq_rcu_node(rnp);
 	if (!READ_ONCE(rsp->gp_flags)) {
 		/* Spurious wakeup, tell caller to go back to sleep.  */
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 		return false;
 	}
 	WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
@@ -1850,7 +1848,7 @@
 		 * Grace period already in progress, don't start another.
 		 * Not supposed to be able to happen.
 		 */
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 		return false;
 	}
 
@@ -1859,7 +1857,7 @@
 	/* Record GP times before starting GP, hence smp_store_release(). */
 	smp_store_release(&rsp->gpnum, rsp->gpnum + 1);
 	trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
-	raw_spin_unlock_irq(&rnp->lock);
+	raw_spin_unlock_irq_rcu_node(rnp);
 
 	/*
 	 * Apply per-leaf buffered online and offline operations to the
@@ -1873,7 +1871,7 @@
 		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
 		    !rnp->wait_blkd_tasks) {
 			/* Nothing to do on this leaf rcu_node structure. */
-			raw_spin_unlock_irq(&rnp->lock);
+			raw_spin_unlock_irq_rcu_node(rnp);
 			continue;
 		}
 
@@ -1907,7 +1905,7 @@
 			rcu_cleanup_dead_rnp(rnp);
 		}
 
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 	}
 
 	/*
@@ -1938,7 +1936,7 @@
 		trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
 					    rnp->level, rnp->grplo,
 					    rnp->grphi, rnp->qsmask);
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 		cond_resched_rcu_qs();
 		WRITE_ONCE(rsp->gp_activity, jiffies);
 	}
@@ -1996,7 +1994,7 @@
 		raw_spin_lock_irq_rcu_node(rnp);
 		WRITE_ONCE(rsp->gp_flags,
 			   READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 	}
 }
 
@@ -2010,6 +2008,7 @@
 	int nocb = 0;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp = rcu_get_root(rsp);
+	struct swait_queue_head *sq;
 
 	WRITE_ONCE(rsp->gp_activity, jiffies);
 	raw_spin_lock_irq_rcu_node(rnp);
@@ -2025,7 +2024,7 @@
 	 * safe for us to drop the lock in order to mark the grace
 	 * period as completed in all of the rcu_node structures.
 	 */
-	raw_spin_unlock_irq(&rnp->lock);
+	raw_spin_unlock_irq_rcu_node(rnp);
 
 	/*
 	 * Propagate new ->completed value to rcu_node structures so
@@ -2046,7 +2045,9 @@
 			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
 		/* smp_mb() provided by prior unlock-lock pair. */
 		nocb += rcu_future_gp_cleanup(rsp, rnp);
-		raw_spin_unlock_irq(&rnp->lock);
+		sq = rcu_nocb_gp_get(rnp);
+		raw_spin_unlock_irq_rcu_node(rnp);
+		rcu_nocb_gp_cleanup(sq);
 		cond_resched_rcu_qs();
 		WRITE_ONCE(rsp->gp_activity, jiffies);
 		rcu_gp_slow(rsp, gp_cleanup_delay);
@@ -2068,7 +2069,7 @@
 				       READ_ONCE(rsp->gpnum),
 				       TPS("newreq"));
 	}
-	raw_spin_unlock_irq(&rnp->lock);
+	raw_spin_unlock_irq_rcu_node(rnp);
 }
 
 /*
@@ -2092,7 +2093,7 @@
 					       READ_ONCE(rsp->gpnum),
 					       TPS("reqwait"));
 			rsp->gp_state = RCU_GP_WAIT_GPS;
-			wait_event_interruptible(rsp->gp_wq,
+			swait_event_interruptible(rsp->gp_wq,
 						 READ_ONCE(rsp->gp_flags) &
 						 RCU_GP_FLAG_INIT);
 			rsp->gp_state = RCU_GP_DONE_GPS;
@@ -2122,7 +2123,7 @@
 					       READ_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
 			rsp->gp_state = RCU_GP_WAIT_FQS;
-			ret = wait_event_interruptible_timeout(rsp->gp_wq,
+			ret = swait_event_interruptible_timeout(rsp->gp_wq,
 					rcu_gp_fqs_check_wake(rsp, &gf), j);
 			rsp->gp_state = RCU_GP_DOING_FQS;
 			/* Locking provides needed memory barriers. */
@@ -2234,19 +2235,21 @@
 }
 
 /*
- * Report a full set of quiescent states to the specified rcu_state
- * data structure.  This involves cleaning up after the prior grace
- * period and letting rcu_start_gp() start up the next grace period
- * if one is needed.  Note that the caller must hold rnp->lock, which
- * is released before return.
+ * Report a full set of quiescent states to the specified rcu_state data
+ * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period
+ * kthread if another grace period is required.  Whether we wake
+ * the grace-period kthread or it awakens itself for the next round
+ * of quiescent-state forcing, that kthread will clean up after the
+ * just-completed grace period.  Note that the caller must hold rnp->lock,
+ * which is released before return.
  */
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 	__releases(rcu_get_root(rsp)->lock)
 {
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
-	raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-	rcu_gp_kthread_wake(rsp);
+	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
+	swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
 }
 
 /*
@@ -2275,7 +2278,7 @@
 			 * Our bit has already been cleared, or the
 			 * relevant grace period is already over, so done.
 			 */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			return;
 		}
 		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
@@ -2287,7 +2290,7 @@
 		if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 
 			/* Other bits still set at this level, so done. */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			return;
 		}
 		mask = rnp->grpmask;
@@ -2297,7 +2300,7 @@
 
 			break;
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		rnp_c = rnp;
 		rnp = rnp->parent;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -2329,7 +2332,7 @@
 
 	if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
 	    rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;  /* Still need more quiescent states! */
 	}
 
@@ -2346,19 +2349,14 @@
 	/* Report up the rest of the hierarchy, tracking current ->gpnum. */
 	gps = rnp->gpnum;
 	mask = rnp->grpmask;
-	raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
+	raw_spin_unlock_rcu_node(rnp);	/* irqs remain disabled. */
 	raw_spin_lock_rcu_node(rnp_p);	/* irqs already disabled. */
 	rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
 }
 
 /*
  * Record a quiescent state for the specified CPU to that CPU's rcu_data
- * structure.  This must be either called from the specified CPU, or
- * called when the specified CPU is known to be offline (and when it is
- * also known that no other CPU is concurrently trying to help the offline
- * CPU).  The lastcomp argument is used to make sure we are still in the
- * grace period of interest.  We don't want to end the current grace period
- * based on quiescent states detected in an earlier grace period!
+ * structure.  This must be called from the specified CPU.
  */
 static void
 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
@@ -2383,14 +2381,14 @@
 		 */
 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */
 		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	mask = rdp->grpmask;
 	if ((rnp->qsmask & mask) == 0) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	} else {
-		rdp->core_needs_qs = 0;
+		rdp->core_needs_qs = false;
 
 		/*
 		 * This GP can't end until cpu checks in, so all of our
@@ -2599,10 +2597,11 @@
 		rnp->qsmaskinit &= ~mask;
 		rnp->qsmask &= ~mask;
 		if (rnp->qsmaskinit) {
-			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+			raw_spin_unlock_rcu_node(rnp);
+			/* irqs remain disabled. */
 			return;
 		}
-		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
 	}
 }
 
@@ -2625,7 +2624,7 @@
 	mask = rdp->grpmask;
 	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
 	rnp->qsmaskinitnext &= ~mask;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 /*
@@ -2859,7 +2858,7 @@
 			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
 		} else {
 			/* Nothing to do here, so just drop the lock. */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 	}
 }
@@ -2895,12 +2894,12 @@
 	raw_spin_unlock(&rnp_old->fqslock);
 	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
 		rsp->n_force_qs_lh++;
-		raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
 		return;  /* Someone beat us to it. */
 	}
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
-	raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-	rcu_gp_kthread_wake(rsp);
+	raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
+	swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
 }
 
 /*
@@ -2925,7 +2924,7 @@
 	if (cpu_needs_another_gp(rsp, rdp)) {
 		raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
 		needwake = rcu_start_gp(rsp);
-		raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
 		if (needwake)
 			rcu_gp_kthread_wake(rsp);
 	} else {
@@ -3016,7 +3015,7 @@
 
 			raw_spin_lock_rcu_node(rnp_root);
 			needwake = rcu_start_gp(rsp);
-			raw_spin_unlock(&rnp_root->lock);
+			raw_spin_unlock_rcu_node(rnp_root);
 			if (needwake)
 				rcu_gp_kthread_wake(rsp);
 		} else {
@@ -3436,14 +3435,14 @@
 	rcu_for_each_leaf_node(rsp, rnp) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->expmaskinit == rnp->expmaskinitnext) {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			continue;  /* No new CPUs, nothing to do. */
 		}
 
 		/* Update this node's mask, track old value for propagation. */
 		oldmask = rnp->expmaskinit;
 		rnp->expmaskinit = rnp->expmaskinitnext;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 		/* If was already nonzero, nothing to propagate. */
 		if (oldmask)
@@ -3458,7 +3457,7 @@
 			if (rnp_up->expmaskinit)
 				done = true;
 			rnp_up->expmaskinit |= mask;
-			raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
 			if (done)
 				break;
 			mask = rnp_up->grpmask;
@@ -3481,7 +3480,7 @@
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		WARN_ON_ONCE(rnp->expmask);
 		rnp->expmask = rnp->expmaskinit;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
 
@@ -3522,19 +3521,19 @@
 			if (!rnp->expmask)
 				rcu_initiate_boost(rnp, flags);
 			else
-				raw_spin_unlock_irqrestore(&rnp->lock, flags);
+				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			break;
 		}
 		if (rnp->parent == NULL) {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			if (wake) {
 				smp_mb(); /* EGP done before wake_up(). */
-				wake_up(&rsp->expedited_wq);
+				swake_up(&rsp->expedited_wq);
 			}
 			break;
 		}
 		mask = rnp->grpmask;
-		raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
 		rnp = rnp->parent;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
 		WARN_ON_ONCE(!(rnp->expmask & mask));
@@ -3569,7 +3568,7 @@
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	if (!(rnp->expmask & mask)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	rnp->expmask &= ~mask;
@@ -3730,7 +3729,7 @@
 		 */
 		if (rcu_preempt_has_tasks(rnp))
 			rnp->exp_tasks = rnp->blkd_tasks.next;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 		/* IPI the remaining CPUs for expedited quiescent state. */
 		mask = 1;
@@ -3747,7 +3746,7 @@
 			raw_spin_lock_irqsave_rcu_node(rnp, flags);
 			if (cpu_online(cpu) &&
 			    (rnp->expmask & mask)) {
-				raw_spin_unlock_irqrestore(&rnp->lock, flags);
+				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 				schedule_timeout_uninterruptible(1);
 				if (cpu_online(cpu) &&
 				    (rnp->expmask & mask))
@@ -3756,7 +3755,7 @@
 			}
 			if (!(rnp->expmask & mask))
 				mask_ofl_ipi &= ~mask;
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 		/* Report quiescent states for those that went offline. */
 		mask_ofl_test |= mask_ofl_ipi;
@@ -3780,7 +3779,7 @@
 	jiffies_start = jiffies;
 
 	for (;;) {
-		ret = wait_event_interruptible_timeout(
+		ret = swait_event_timeout(
 				rsp->expedited_wq,
 				sync_rcu_preempt_exp_done(rnp_root),
 				jiffies_stall);
@@ -3788,7 +3787,7 @@
 			return;
 		if (ret < 0) {
 			/* Hit a signal, disable CPU stall warnings. */
-			wait_event(rsp->expedited_wq,
+			swait_event(rsp->expedited_wq,
 				   sync_rcu_preempt_exp_done(rnp_root));
 			return;
 		}
@@ -4163,7 +4162,7 @@
 			return;
 		raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
 		rnp->qsmaskinit |= mask;
-		raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
+		raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */
 	}
 }
 
@@ -4187,7 +4186,7 @@
 	rdp->rsp = rsp;
 	mutex_init(&rdp->exp_funnel_mutex);
 	rcu_boot_init_nocb_percpu_data(rdp);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 /*
@@ -4215,7 +4214,7 @@
 	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	atomic_set(&rdp->dynticks->dynticks,
 		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
-	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */
+	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 
 	/*
 	 * Add CPU to leaf rcu_node pending-online bitmask.  Any needed
@@ -4236,7 +4235,7 @@
 	rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
 	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 static void rcu_prepare_cpu(int cpu)
@@ -4358,7 +4357,7 @@
 			sp.sched_priority = kthread_prio;
 			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		wake_up_process(t);
 	}
 	rcu_spawn_nocb_kthreads();
@@ -4449,8 +4448,8 @@
 		cpustride *= levelspread[i];
 		rnp = rsp->level[i];
 		for (j = 0; j < levelcnt[i]; j++, rnp++) {
-			raw_spin_lock_init(&rnp->lock);
-			lockdep_set_class_and_name(&rnp->lock,
+			raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
+			lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
 						   &rcu_node_class[i], buf[i]);
 			raw_spin_lock_init(&rnp->fqslock);
 			lockdep_set_class_and_name(&rnp->fqslock,
@@ -4482,8 +4481,8 @@
 		}
 	}
 
-	init_waitqueue_head(&rsp->gp_wq);
-	init_waitqueue_head(&rsp->expedited_wq);
+	init_swait_queue_head(&rsp->gp_wq);
+	init_swait_queue_head(&rsp->expedited_wq);
 	rnp = rsp->level[rcu_num_lvls - 1];
 	for_each_possible_cpu(i) {
 		while (i > rnp->grphi)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 83360b4..df668c0 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -27,6 +27,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
+#include <linux/swait.h>
 #include <linux/stop_machine.h>
 
 /*
@@ -149,8 +150,9 @@
  * Definition for node within the RCU grace-period-detection hierarchy.
  */
 struct rcu_node {
-	raw_spinlock_t lock;	/* Root rcu_node's lock protects some */
-				/*  rcu_state fields as well as following. */
+	raw_spinlock_t __private lock;	/* Root rcu_node's lock protects */
+					/*  some rcu_state fields as well as */
+					/*  following. */
 	unsigned long gpnum;	/* Current grace period for this node. */
 				/*  This will either be equal to or one */
 				/*  behind the root rcu_node's gpnum. */
@@ -243,7 +245,7 @@
 				/* Refused to boost: not sure why, though. */
 				/*  This can happen due to race conditions. */
 #ifdef CONFIG_RCU_NOCB_CPU
-	wait_queue_head_t nocb_gp_wq[2];
+	struct swait_queue_head nocb_gp_wq[2];
 				/* Place for rcu_nocb_kthread() to wait GP. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 	int need_future_gp[2];
@@ -399,7 +401,7 @@
 	atomic_long_t nocb_q_count_lazy; /*  invocation (all stages). */
 	struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
 	struct rcu_head **nocb_follower_tail;
-	wait_queue_head_t nocb_wq;	/* For nocb kthreads to sleep on. */
+	struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
 	struct task_struct *nocb_kthread;
 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
 
@@ -478,7 +480,7 @@
 	unsigned long gpnum;			/* Current gp number. */
 	unsigned long completed;		/* # of last completed gp. */
 	struct task_struct *gp_kthread;		/* Task for grace periods. */
-	wait_queue_head_t gp_wq;		/* Where GP task waits. */
+	struct swait_queue_head gp_wq;		/* Where GP task waits. */
 	short gp_flags;				/* Commands for GP task. */
 	short gp_state;				/* GP kthread sleep state. */
 
@@ -506,7 +508,7 @@
 	unsigned long expedited_sequence;	/* Take a ticket. */
 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
-	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
+	struct swait_queue_head expedited_wq;	/* Wait for check-ins. */
 	int ncpus_snap;				/* # CPUs seen last time. */
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
@@ -621,7 +623,8 @@
 static void increment_cpu_stall_ticks(void);
 static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy, unsigned long flags);
@@ -680,7 +683,7 @@
 #endif /* #else #ifdef CONFIG_PPC */
 
 /*
- * Wrappers for the rcu_node::lock acquire.
+ * Wrappers for the rcu_node::lock acquire and release.
  *
  * Because the rcu_nodes form a tree, the tree traversal locking will observe
  * different lock values, this in turn means that an UNLOCK of one level
@@ -689,29 +692,48 @@
  *
  * In order to restore full ordering between tree levels, augment the regular
  * lock acquire functions with smp_mb__after_unlock_lock().
+ *
+ * As ->lock of struct rcu_node is a __private field, therefore one should use
+ * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
  */
 static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
 {
-	raw_spin_lock(&rnp->lock);
+	raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
 	smp_mb__after_unlock_lock();
 }
 
+static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
+{
+	raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
+}
+
 static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
 {
-	raw_spin_lock_irq(&rnp->lock);
+	raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
 	smp_mb__after_unlock_lock();
 }
 
-#define raw_spin_lock_irqsave_rcu_node(rnp, flags)	\
-do {							\
-	typecheck(unsigned long, flags);		\
-	raw_spin_lock_irqsave(&(rnp)->lock, flags);	\
-	smp_mb__after_unlock_lock();			\
+static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
+{
+	raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags)			\
+do {									\
+	typecheck(unsigned long, flags);				\
+	raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags);	\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags)			\
+do {									\
+	typecheck(unsigned long, flags);				\
+	raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags);	\
 } while (0)
 
 static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
 {
-	bool locked = raw_spin_trylock(&rnp->lock);
+	bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
 
 	if (locked)
 		smp_mb__after_unlock_lock();
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9467a8b..efdf7b6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -235,7 +235,7 @@
 		rnp->gp_tasks = &t->rcu_node_entry;
 	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
 		rnp->exp_tasks = &t->rcu_node_entry;
-	raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */
+	raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
 
 	/*
 	 * Report the quiescent state for the expedited GP.  This expedited
@@ -489,7 +489,7 @@
 							 !!rnp->gp_tasks);
 			rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
 		} else {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 
 		/* Unboost if we were boosted. */
@@ -518,14 +518,14 @@
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	if (!rcu_preempt_blocked_readers_cgp(rnp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	t = list_entry(rnp->gp_tasks->prev,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
 		sched_show_task(t);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 /*
@@ -807,7 +807,6 @@
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
 static struct rcu_state *const rcu_state_p = &rcu_sched_state;
-static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
 
 /*
  * Tell them what RCU they are running.
@@ -991,7 +990,7 @@
 	 * might exit their RCU read-side critical sections on their own.
 	 */
 	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return 0;
 	}
 
@@ -1028,7 +1027,7 @@
 	 */
 	t = container_of(tb, struct task_struct, rcu_node_entry);
 	rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	/* Lock only for side effect: boosts task t's priority. */
 	rt_mutex_lock(&rnp->boost_mtx);
 	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
@@ -1088,7 +1087,7 @@
 
 	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
 		rnp->n_balk_exp_gp_tasks++;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	if (rnp->exp_tasks != NULL ||
@@ -1098,13 +1097,13 @@
 	     ULONG_CMP_GE(jiffies, rnp->boost_time))) {
 		if (rnp->exp_tasks == NULL)
 			rnp->boost_tasks = rnp->gp_tasks;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		t = rnp->boost_kthread_task;
 		if (t)
 			rcu_wake_cond(t, rnp->boost_kthread_status);
 	} else {
 		rcu_initiate_boost_trace(rnp);
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
 
@@ -1172,7 +1171,7 @@
 		return PTR_ERR(t);
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rnp->boost_kthread_task = t;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
@@ -1308,7 +1307,7 @@
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	__releases(rnp->lock)
 {
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 static void invoke_rcu_callbacks_kthread(void)
@@ -1559,7 +1558,7 @@
 		rnp = rdp->mynode;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
 		needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
-		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
 		if (needwake)
 			rcu_gp_kthread_wake(rsp);
 	}
@@ -1811,9 +1810,9 @@
  * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
  * grace period.
  */
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
 {
-	wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+	swake_up_all(sq);
 }
 
 /*
@@ -1829,10 +1828,15 @@
 	rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
 }
 
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+	return &rnp->nocb_gp_wq[rnp->completed & 0x1];
+}
+
 static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
-	init_waitqueue_head(&rnp->nocb_gp_wq[0]);
-	init_waitqueue_head(&rnp->nocb_gp_wq[1]);
+	init_swait_queue_head(&rnp->nocb_gp_wq[0]);
+	init_swait_queue_head(&rnp->nocb_gp_wq[1]);
 }
 
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
@@ -1857,7 +1861,7 @@
 	if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
 		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
-		wake_up(&rdp_leader->nocb_wq);
+		swake_up(&rdp_leader->nocb_wq);
 	}
 }
 
@@ -2059,7 +2063,7 @@
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	needwake = rcu_start_future_gp(rnp, rdp, &c);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	if (needwake)
 		rcu_gp_kthread_wake(rdp->rsp);
 
@@ -2069,7 +2073,7 @@
 	 */
 	trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
 	for (;;) {
-		wait_event_interruptible(
+		swait_event_interruptible(
 			rnp->nocb_gp_wq[c & 0x1],
 			(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
 		if (likely(d))
@@ -2097,7 +2101,7 @@
 	/* Wait for callbacks to appear. */
 	if (!rcu_nocb_poll) {
 		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
-		wait_event_interruptible(my_rdp->nocb_wq,
+		swait_event_interruptible(my_rdp->nocb_wq,
 				!READ_ONCE(my_rdp->nocb_leader_sleep));
 		/* Memory barrier handled by smp_mb() calls below and repoll. */
 	} else if (firsttime) {
@@ -2172,7 +2176,7 @@
 			 * List was empty, wake up the follower.
 			 * Memory barriers supplied by atomic_long_add().
 			 */
-			wake_up(&rdp->nocb_wq);
+			swake_up(&rdp->nocb_wq);
 		}
 	}
 
@@ -2193,7 +2197,7 @@
 		if (!rcu_nocb_poll) {
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    "FollowerSleep");
-			wait_event_interruptible(rdp->nocb_wq,
+			swait_event_interruptible(rdp->nocb_wq,
 						 READ_ONCE(rdp->nocb_follower_head));
 		} else if (firsttime) {
 			/* Don't drown trace log with "Poll"! */
@@ -2352,7 +2356,7 @@
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
 	rdp->nocb_tail = &rdp->nocb_head;
-	init_waitqueue_head(&rdp->nocb_wq);
+	init_swait_queue_head(&rdp->nocb_wq);
 	rdp->nocb_follower_tail = &rdp->nocb_follower_head;
 }
 
@@ -2502,7 +2506,7 @@
 	return false;
 }
 
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
 {
 }
 
@@ -2510,6 +2514,11 @@
 {
 }
 
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+	return NULL;
+}
+
 static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
 }
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 76b94e1..ca828b4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -128,6 +128,7 @@
 {
 	return READ_ONCE(rcu_normal);
 }
+EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
 
 static atomic_t rcu_expedited_nesting =
 	ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
diff --git a/kernel/resource.c b/kernel/resource.c
index 09c0597..4d46605 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -333,13 +333,13 @@
 EXPORT_SYMBOL(release_resource);
 
 /*
- * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
- * the caller must specify res->start, res->end, res->flags and "name".
- * If found, returns 0, res is overwritten, if not found, returns -1.
- * This walks through whole tree and not just first level children
- * until and unless first_level_children_only is true.
+ * Finds the lowest iomem resource existing within [res->start.res->end).
+ * The caller must specify res->start, res->end, res->flags, and optionally
+ * desc.  If found, returns 0, res is overwritten, if not found, returns -1.
+ * This function walks the whole tree and not just first level children until
+ * and unless first_level_children_only is true.
  */
-static int find_next_iomem_res(struct resource *res, char *name,
+static int find_next_iomem_res(struct resource *res, unsigned long desc,
 			       bool first_level_children_only)
 {
 	resource_size_t start, end;
@@ -358,9 +358,9 @@
 	read_lock(&resource_lock);
 
 	for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) {
-		if (p->flags != res->flags)
+		if ((p->flags & res->flags) != res->flags)
 			continue;
-		if (name && strcmp(p->name, name))
+		if ((desc != IORES_DESC_NONE) && (desc != p->desc))
 			continue;
 		if (p->start > end) {
 			p = NULL;
@@ -385,15 +385,18 @@
  * Walks through iomem resources and calls func() with matching resource
  * ranges. This walks through whole tree and not just first level children.
  * All the memory ranges which overlap start,end and also match flags and
- * name are valid candidates.
+ * desc are valid candidates.
  *
- * @name: name of resource
- * @flags: resource flags
+ * @desc: I/O resource descriptor. Use IORES_DESC_NONE to skip @desc check.
+ * @flags: I/O resource flags
  * @start: start addr
  * @end: end addr
+ *
+ * NOTE: For a new descriptor search, define a new IORES_DESC in
+ * <linux/ioport.h> and set it in 'desc' of a target resource entry.
  */
-int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
-		void *arg, int (*func)(u64, u64, void *))
+int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
+		u64 end, void *arg, int (*func)(u64, u64, void *))
 {
 	struct resource res;
 	u64 orig_end;
@@ -403,23 +406,27 @@
 	res.end = end;
 	res.flags = flags;
 	orig_end = res.end;
+
 	while ((res.start < res.end) &&
-		(!find_next_iomem_res(&res, name, false))) {
+		(!find_next_iomem_res(&res, desc, false))) {
+
 		ret = (*func)(res.start, res.end, arg);
 		if (ret)
 			break;
+
 		res.start = res.end + 1;
 		res.end = orig_end;
 	}
+
 	return ret;
 }
 
 /*
- * This function calls callback against all memory range of "System RAM"
- * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
- * Now, this function is only for "System RAM". This function deals with
- * full ranges and not pfn. If resources are not pfn aligned, dealing
- * with pfn can truncate ranges.
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+ * Now, this function is only for System RAM, it deals with full ranges and
+ * not PFNs. If resources are not PFN-aligned, dealing with PFNs can truncate
+ * ranges.
  */
 int walk_system_ram_res(u64 start, u64 end, void *arg,
 				int (*func)(u64, u64, void *))
@@ -430,10 +437,10 @@
 
 	res.start = start;
 	res.end = end;
-	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) &&
-		(!find_next_iomem_res(&res, "System RAM", true))) {
+		(!find_next_iomem_res(&res, IORES_DESC_NONE, true))) {
 		ret = (*func)(res.start, res.end, arg);
 		if (ret)
 			break;
@@ -446,9 +453,9 @@
 #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 
 /*
- * This function calls callback against all memory range of "System RAM"
- * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
- * Now, this function is only for "System RAM".
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+ * It is to be used only for System RAM.
  */
 int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *))
@@ -460,10 +467,10 @@
 
 	res.start = (u64) start_pfn << PAGE_SHIFT;
 	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
-	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) &&
-		(find_next_iomem_res(&res, "System RAM", true) >= 0)) {
+		(find_next_iomem_res(&res, IORES_DESC_NONE, true) >= 0)) {
 		pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		end_pfn = (res.end + 1) >> PAGE_SHIFT;
 		if (end_pfn > pfn)
@@ -484,7 +491,7 @@
 }
 /*
  * This generic page_is_ram() returns true if specified address is
- * registered as "System RAM" in iomem_resource list.
+ * registered as System RAM in iomem_resource list.
  */
 int __weak page_is_ram(unsigned long pfn)
 {
@@ -496,30 +503,34 @@
  * region_intersects() - determine intersection of region with known resources
  * @start: region start address
  * @size: size of region
- * @name: name of resource (in iomem_resource)
+ * @flags: flags of resource (in iomem_resource)
+ * @desc: descriptor of resource (in iomem_resource) or IORES_DESC_NONE
  *
  * Check if the specified region partially overlaps or fully eclipses a
- * resource identified by @name.  Return REGION_DISJOINT if the region
- * does not overlap @name, return REGION_MIXED if the region overlaps
- * @type and another resource, and return REGION_INTERSECTS if the
- * region overlaps @type and no other defined resource. Note, that
- * REGION_INTERSECTS is also returned in the case when the specified
- * region overlaps RAM and undefined memory holes.
+ * resource identified by @flags and @desc (optional with IORES_DESC_NONE).
+ * Return REGION_DISJOINT if the region does not overlap @flags/@desc,
+ * return REGION_MIXED if the region overlaps @flags/@desc and another
+ * resource, and return REGION_INTERSECTS if the region overlaps @flags/@desc
+ * and no other defined resource. Note that REGION_INTERSECTS is also
+ * returned in the case when the specified region overlaps RAM and undefined
+ * memory holes.
  *
  * region_intersect() is used by memory remapping functions to ensure
  * the user is not remapping RAM and is a vast speed up over walking
  * through the resource table page by page.
  */
-int region_intersects(resource_size_t start, size_t size, const char *name)
+int region_intersects(resource_size_t start, size_t size, unsigned long flags,
+		      unsigned long desc)
 {
-	unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	resource_size_t end = start + size - 1;
 	int type = 0; int other = 0;
 	struct resource *p;
 
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
-		bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+		bool is_type = (((p->flags & flags) == flags) &&
+				((desc == IORES_DESC_NONE) ||
+				 (desc == p->desc)));
 
 		if (start >= p->start && start <= p->end)
 			is_type ? type++ : other++;
@@ -538,6 +549,7 @@
 
 	return REGION_DISJOINT;
 }
+EXPORT_SYMBOL_GPL(region_intersects);
 
 void __weak arch_remove_reservations(struct resource *avail)
 {
@@ -948,6 +960,7 @@
 	res->start = start;
 	res->end = end;
 	res->flags = IORESOURCE_BUSY;
+	res->desc = IORES_DESC_NONE;
 
 	while (1) {
 
@@ -982,6 +995,7 @@
 				next_res->start = conflict->end + 1;
 				next_res->end = end;
 				next_res->flags = IORESOURCE_BUSY;
+				next_res->desc = IORES_DESC_NONE;
 			}
 		} else {
 			res->start = conflict->end + 1;
@@ -1071,8 +1085,9 @@
 	res->name = name;
 	res->start = start;
 	res->end = start + n - 1;
-	res->flags = resource_type(parent);
+	res->flags = resource_type(parent) | resource_ext_type(parent);
 	res->flags |= IORESOURCE_BUSY | flags;
+	res->desc = IORES_DESC_NONE;
 
 	write_lock(&resource_lock);
 
@@ -1083,9 +1098,10 @@
 		if (!conflict)
 			break;
 		if (conflict != parent) {
-			parent = conflict;
-			if (!(conflict->flags & IORESOURCE_BUSY))
+			if (!(conflict->flags & IORESOURCE_BUSY)) {
+				parent = conflict;
 				continue;
+			}
 		}
 		if (conflict->flags & flags & IORESOURCE_MUXED) {
 			add_wait_queue(&muxed_resource_wait, &wait);
@@ -1237,6 +1253,7 @@
 			new_res->start = end + 1;
 			new_res->end = res->end;
 			new_res->flags = res->flags;
+			new_res->desc = res->desc;
 			new_res->parent = res->parent;
 			new_res->sibling = res->sibling;
 			new_res->child = NULL;
@@ -1412,6 +1429,7 @@
 			res->start = io_start;
 			res->end = io_start + io_num - 1;
 			res->flags = IORESOURCE_BUSY;
+			res->desc = IORES_DESC_NONE;
 			res->child = NULL;
 			if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
 				reserved = x+1;
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 6768797..7d4cba2 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -13,7 +13,7 @@
 
 obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
-obj-y += wait.o completion.o idle.o
+obj-y += wait.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index bc54e84..fedb967 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -61,6 +61,7 @@
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
+#include <linux/tick.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -89,6 +90,8 @@
 {
 	if (!sched_clock_stable())
 		static_key_slow_inc(&__sched_clock_stable);
+
+	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
 void set_sched_clock_stable(void)
@@ -108,6 +111,8 @@
 	/* XXX worry about clock continuity */
 	if (sched_clock_stable())
 		static_key_slow_dec(&__sched_clock_stable);
+
+	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
 static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 63d3a24..e5725b9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -26,6 +26,7 @@
  *              Thomas Gleixner, Mike Kravetz
  */
 
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -66,12 +67,10 @@
 #include <linux/pagemap.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
-#include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
 #include <linux/init_task.h>
-#include <linux/binfmts.h>
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 
@@ -124,138 +123,6 @@
 
 #undef SCHED_FEAT
 
-#ifdef CONFIG_SCHED_DEBUG
-#define SCHED_FEAT(name, enabled)	\
-	#name ,
-
-static const char * const sched_feat_names[] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static int sched_feat_show(struct seq_file *m, void *v)
-{
-	int i;
-
-	for (i = 0; i < __SCHED_FEAT_NR; i++) {
-		if (!(sysctl_sched_features & (1UL << i)))
-			seq_puts(m, "NO_");
-		seq_printf(m, "%s ", sched_feat_names[i]);
-	}
-	seq_puts(m, "\n");
-
-	return 0;
-}
-
-#ifdef HAVE_JUMP_LABEL
-
-#define jump_label_key__true  STATIC_KEY_INIT_TRUE
-#define jump_label_key__false STATIC_KEY_INIT_FALSE
-
-#define SCHED_FEAT(name, enabled)	\
-	jump_label_key__##enabled ,
-
-struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static void sched_feat_disable(int i)
-{
-	static_key_disable(&sched_feat_keys[i]);
-}
-
-static void sched_feat_enable(int i)
-{
-	static_key_enable(&sched_feat_keys[i]);
-}
-#else
-static void sched_feat_disable(int i) { };
-static void sched_feat_enable(int i) { };
-#endif /* HAVE_JUMP_LABEL */
-
-static int sched_feat_set(char *cmp)
-{
-	int i;
-	int neg = 0;
-
-	if (strncmp(cmp, "NO_", 3) == 0) {
-		neg = 1;
-		cmp += 3;
-	}
-
-	for (i = 0; i < __SCHED_FEAT_NR; i++) {
-		if (strcmp(cmp, sched_feat_names[i]) == 0) {
-			if (neg) {
-				sysctl_sched_features &= ~(1UL << i);
-				sched_feat_disable(i);
-			} else {
-				sysctl_sched_features |= (1UL << i);
-				sched_feat_enable(i);
-			}
-			break;
-		}
-	}
-
-	return i;
-}
-
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
-		size_t cnt, loff_t *ppos)
-{
-	char buf[64];
-	char *cmp;
-	int i;
-	struct inode *inode;
-
-	if (cnt > 63)
-		cnt = 63;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
-
-	/* Ensure the static_key remains in a consistent state */
-	inode = file_inode(filp);
-	inode_lock(inode);
-	i = sched_feat_set(cmp);
-	inode_unlock(inode);
-	if (i == __SCHED_FEAT_NR)
-		return -EINVAL;
-
-	*ppos += cnt;
-
-	return cnt;
-}
-
-static int sched_feat_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, sched_feat_show, NULL);
-}
-
-static const struct file_operations sched_feat_fops = {
-	.open		= sched_feat_open,
-	.write		= sched_feat_write,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static __init int sched_init_debug(void)
-{
-	debugfs_create_file("sched_features", 0644, NULL, NULL,
-			&sched_feat_fops);
-
-	return 0;
-}
-late_initcall(sched_init_debug);
-#endif /* CONFIG_SCHED_DEBUG */
-
 /*
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
@@ -453,20 +320,6 @@
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
-/*
- * cmpxchg based fetch_or, macro so it works for different integer types
- */
-#define fetch_or(ptr, val)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
- 	for (;;) {							\
- 		__old = cmpxchg((ptr), __val, __val | (val));		\
- 		if (__old == __val)					\
- 			break;						\
- 		__val = __old;						\
- 	}								\
- 	__old;								\
-})
-
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -715,31 +568,36 @@
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
-bool sched_can_stop_tick(void)
+bool sched_can_stop_tick(struct rq *rq)
 {
+	int fifo_nr_running;
+
+	/* Deadline tasks, even if single, need the tick */
+	if (rq->dl.dl_nr_running)
+		return false;
+
 	/*
-	 * FIFO realtime policy runs the highest priority task. Other runnable
-	 * tasks are of a lower priority. The scheduler tick does nothing.
+	 * FIFO realtime policy runs the highest priority task (after DEADLINE).
+	 * Other runnable tasks are of a lower priority. The scheduler tick
+	 * isn't needed.
 	 */
-	if (current->policy == SCHED_FIFO)
+	fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+	if (fifo_nr_running)
 		return true;
 
 	/*
 	 * Round-robin realtime tasks time slice with other tasks at the same
-	 * realtime priority. Is this task the only one at this priority?
+	 * realtime priority.
 	 */
-	if (current->policy == SCHED_RR) {
-		struct sched_rt_entity *rt_se = &current->rt;
-
-		return list_is_singular(&rt_se->run_list);
+	if (rq->rt.rr_nr_running) {
+		if (rq->rt.rr_nr_running == 1)
+			return true;
+		else
+			return false;
 	}
 
-	/*
-	 * More than one running task need preemption.
-	 * nr_running update is assumed to be visible
-	 * after IPI is sent from wakers.
-	 */
-	if (this_rq()->nr_running > 1)
+	/* Normal multitasking need periodic preemption checks */
+	if (rq->cfs.nr_running > 1)
 		return false;
 
 	return true;
@@ -2093,7 +1951,8 @@
 
 	ttwu_queue(p, cpu);
 stat:
-	ttwu_stat(p, cpu, wake_flags);
+	if (schedstat_enabled())
+		ttwu_stat(p, cpu, wake_flags);
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -2141,7 +2000,8 @@
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_do_wakeup(rq, p, 0);
-	ttwu_stat(p, smp_processor_id(), 0);
+	if (schedstat_enabled())
+		ttwu_stat(p, smp_processor_id(), 0);
 out:
 	raw_spin_unlock(&p->pi_lock);
 }
@@ -2183,7 +2043,6 @@
 	dl_se->dl_bw = 0;
 
 	dl_se->dl_throttled = 0;
-	dl_se->dl_new = 1;
 	dl_se->dl_yielded = 0;
 }
 
@@ -2210,6 +2069,7 @@
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
+	/* Even if schedstat is disabled, there should not be garbage */
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
@@ -2218,6 +2078,10 @@
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
+	p->rt.timeout		= 0;
+	p->rt.time_slice	= sched_rr_timeslice;
+	p->rt.on_rq		= 0;
+	p->rt.on_list		= 0;
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2281,6 +2145,69 @@
 #endif
 #endif
 
+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+
+#ifdef CONFIG_SCHEDSTATS
+static void set_schedstats(bool enabled)
+{
+	if (enabled)
+		static_branch_enable(&sched_schedstats);
+	else
+		static_branch_disable(&sched_schedstats);
+}
+
+void force_schedstat_enabled(void)
+{
+	if (!schedstat_enabled()) {
+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
+		static_branch_enable(&sched_schedstats);
+	}
+}
+
+static int __init setup_schedstats(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+
+	if (!strcmp(str, "enable")) {
+		set_schedstats(true);
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		set_schedstats(false);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		pr_warn("Unable to parse schedstats=\n");
+
+	return ret;
+}
+__setup("schedstats=", setup_schedstats);
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_schedstats(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table t;
+	int err;
+	int state = static_branch_likely(&sched_schedstats);
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	t = *table;
+	t.data = &state;
+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (write)
+		set_schedstats(state);
+	return err;
+}
+#endif
+#endif
+
 /*
  * fork()/clone()-time setup:
  */
@@ -3010,16 +2937,6 @@
 }
 #endif
 
-notrace unsigned long get_parent_ip(unsigned long addr)
-{
-	if (in_lock_functions(addr)) {
-		addr = CALLER_ADDR2;
-		if (in_lock_functions(addr))
-			addr = CALLER_ADDR3;
-	}
-	return addr;
-}
-
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
@@ -3041,7 +2958,7 @@
 				PREEMPT_MASK - 10);
 #endif
 	if (preempt_count() == val) {
-		unsigned long ip = get_parent_ip(CALLER_ADDR1);
+		unsigned long ip = get_lock_parent_ip();
 #ifdef CONFIG_DEBUG_PREEMPT
 		current->preempt_disable_ip = ip;
 #endif
@@ -3068,7 +2985,7 @@
 #endif
 
 	if (preempt_count() == val)
-		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
 	__preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
@@ -3280,7 +3197,6 @@
 
 		trace_sched_switch(preempt, prev, next);
 		rq = context_switch(rq, prev, next); /* unlocks the rq */
-		cpu = cpu_of(rq);
 	} else {
 		lockdep_unpin_lock(&rq->lock);
 		raw_spin_unlock_irq(&rq->lock);
@@ -3466,7 +3382,7 @@
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
-	int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
+	int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
 	struct rq *rq;
 	const struct sched_class *prev_class;
 
@@ -3494,11 +3410,15 @@
 
 	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
+
+	if (oldprio == prio)
+		queue_flag &= ~DEQUEUE_MOVE;
+
 	prev_class = p->sched_class;
 	queued = task_on_rq_queued(p);
 	running = task_current(rq, p);
 	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE);
+		dequeue_task(rq, p, queue_flag);
 	if (running)
 		put_prev_task(rq, p);
 
@@ -3516,7 +3436,7 @@
 		if (!dl_prio(p->normal_prio) ||
 		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
 			p->dl.dl_boosted = 1;
-			enqueue_flag |= ENQUEUE_REPLENISH;
+			queue_flag |= ENQUEUE_REPLENISH;
 		} else
 			p->dl.dl_boosted = 0;
 		p->sched_class = &dl_sched_class;
@@ -3524,7 +3444,7 @@
 		if (dl_prio(oldprio))
 			p->dl.dl_boosted = 0;
 		if (oldprio < prio)
-			enqueue_flag |= ENQUEUE_HEAD;
+			queue_flag |= ENQUEUE_HEAD;
 		p->sched_class = &rt_sched_class;
 	} else {
 		if (dl_prio(oldprio))
@@ -3539,7 +3459,7 @@
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (queued)
-		enqueue_task(rq, p, enqueue_flag);
+		enqueue_task(rq, p, queue_flag);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -3895,6 +3815,7 @@
 	const struct sched_class *prev_class;
 	struct rq *rq;
 	int reset_on_fork;
+	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
@@ -4077,17 +3998,14 @@
 		 * itself.
 		 */
 		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
-		if (new_effective_prio == oldprio) {
-			__setscheduler_params(p, attr);
-			task_rq_unlock(rq, p, &flags);
-			return 0;
-		}
+		if (new_effective_prio == oldprio)
+			queue_flags &= ~DEQUEUE_MOVE;
 	}
 
 	queued = task_on_rq_queued(p);
 	running = task_current(rq, p);
 	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE);
+		dequeue_task(rq, p, queue_flags);
 	if (running)
 		put_prev_task(rq, p);
 
@@ -4097,15 +4015,14 @@
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (queued) {
-		int enqueue_flags = ENQUEUE_RESTORE;
 		/*
 		 * We enqueue to tail when the priority of a task is
 		 * increased (user space view).
 		 */
-		if (oldprio <= p->prio)
-			enqueue_flags |= ENQUEUE_HEAD;
+		if (oldprio < p->prio)
+			queue_flags |= ENQUEUE_HEAD;
 
-		enqueue_task(rq, p, enqueue_flags);
+		enqueue_task(rq, p, queue_flags);
 	}
 
 	check_class_changed(rq, p, prev_class, oldprio);
@@ -5096,6 +5013,8 @@
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
+	kasan_unpoison_task_stack(idle);
+
 #ifdef CONFIG_SMP
 	/*
 	 * Its possible that init_idle() gets called multiple times on a task,
@@ -5405,183 +5324,6 @@
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-
-static struct ctl_table sd_ctl_dir[] = {
-	{
-		.procname	= "sched_domain",
-		.mode		= 0555,
-	},
-	{}
-};
-
-static struct ctl_table sd_ctl_root[] = {
-	{
-		.procname	= "kernel",
-		.mode		= 0555,
-		.child		= sd_ctl_dir,
-	},
-	{}
-};
-
-static struct ctl_table *sd_alloc_ctl_entry(int n)
-{
-	struct ctl_table *entry =
-		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-
-	return entry;
-}
-
-static void sd_free_ctl_entry(struct ctl_table **tablep)
-{
-	struct ctl_table *entry;
-
-	/*
-	 * In the intermediate directories, both the child directory and
-	 * procname are dynamically allocated and could fail but the mode
-	 * will always be set. In the lowest directory the names are
-	 * static strings and all have proc handlers.
-	 */
-	for (entry = *tablep; entry->mode; entry++) {
-		if (entry->child)
-			sd_free_ctl_entry(&entry->child);
-		if (entry->proc_handler == NULL)
-			kfree(entry->procname);
-	}
-
-	kfree(*tablep);
-	*tablep = NULL;
-}
-
-static int min_load_idx = 0;
-static int max_load_idx = CPU_LOAD_IDX_MAX-1;
-
-static void
-set_table_entry(struct ctl_table *entry,
-		const char *procname, void *data, int maxlen,
-		umode_t mode, proc_handler *proc_handler,
-		bool load_idx)
-{
-	entry->procname = procname;
-	entry->data = data;
-	entry->maxlen = maxlen;
-	entry->mode = mode;
-	entry->proc_handler = proc_handler;
-
-	if (load_idx) {
-		entry->extra1 = &min_load_idx;
-		entry->extra2 = &max_load_idx;
-	}
-}
-
-static struct ctl_table *
-sd_alloc_ctl_domain_table(struct sched_domain *sd)
-{
-	struct ctl_table *table = sd_alloc_ctl_entry(14);
-
-	if (table == NULL)
-		return NULL;
-
-	set_table_entry(&table[0], "min_interval", &sd->min_interval,
-		sizeof(long), 0644, proc_doulongvec_minmax, false);
-	set_table_entry(&table[1], "max_interval", &sd->max_interval,
-		sizeof(long), 0644, proc_doulongvec_minmax, false);
-	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[9], "cache_nice_tries",
-		&sd->cache_nice_tries,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[10], "flags", &sd->flags,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[11], "max_newidle_lb_cost",
-		&sd->max_newidle_lb_cost,
-		sizeof(long), 0644, proc_doulongvec_minmax, false);
-	set_table_entry(&table[12], "name", sd->name,
-		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
-	/* &table[13] is terminator */
-
-	return table;
-}
-
-static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-{
-	struct ctl_table *entry, *table;
-	struct sched_domain *sd;
-	int domain_num = 0, i;
-	char buf[32];
-
-	for_each_domain(cpu, sd)
-		domain_num++;
-	entry = table = sd_alloc_ctl_entry(domain_num + 1);
-	if (table == NULL)
-		return NULL;
-
-	i = 0;
-	for_each_domain(cpu, sd) {
-		snprintf(buf, 32, "domain%d", i);
-		entry->procname = kstrdup(buf, GFP_KERNEL);
-		entry->mode = 0555;
-		entry->child = sd_alloc_ctl_domain_table(sd);
-		entry++;
-		i++;
-	}
-	return table;
-}
-
-static struct ctl_table_header *sd_sysctl_header;
-static void register_sched_domain_sysctl(void)
-{
-	int i, cpu_num = num_possible_cpus();
-	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
-	char buf[32];
-
-	WARN_ON(sd_ctl_dir[0].child);
-	sd_ctl_dir[0].child = entry;
-
-	if (entry == NULL)
-		return;
-
-	for_each_possible_cpu(i) {
-		snprintf(buf, 32, "cpu%d", i);
-		entry->procname = kstrdup(buf, GFP_KERNEL);
-		entry->mode = 0555;
-		entry->child = sd_alloc_ctl_cpu_table(i);
-		entry++;
-	}
-
-	WARN_ON(sd_sysctl_header);
-	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-}
-
-/* may be called multiple times per register */
-static void unregister_sched_domain_sysctl(void)
-{
-	unregister_sysctl_table(sd_sysctl_header);
-	sd_sysctl_header = NULL;
-	if (sd_ctl_dir[0].child)
-		sd_free_ctl_entry(&sd_ctl_dir[0].child);
-}
-#else
-static void register_sched_domain_sysctl(void)
-{
-}
-static void unregister_sched_domain_sysctl(void)
-{
-}
-#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
-
 static void set_rq_online(struct rq *rq)
 {
 	if (!rq->online) {
@@ -6173,11 +5915,16 @@
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
+	int ret;
+
 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
-	cpulist_parse(str, cpu_isolated_map);
+	ret = cpulist_parse(str, cpu_isolated_map);
+	if (ret) {
+		pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
+		return 0;
+	}
 	return 1;
 }
-
 __setup("isolcpus=", isolated_cpu_setup);
 
 struct s_data {
@@ -6840,7 +6587,7 @@
 
 			sched_domains_numa_masks[i][j] = mask;
 
-			for (k = 0; k < nr_node_ids; k++) {
+			for_each_node(k) {
 				if (node_distance(j, k) > sched_domains_numa_distance[i])
 					continue;
 
@@ -7860,11 +7607,9 @@
 void sched_offline_group(struct task_group *tg)
 {
 	unsigned long flags;
-	int i;
 
 	/* end participation in shares distribution */
-	for_each_possible_cpu(i)
-		unregister_fair_sched_group(tg, i);
+	unregister_fair_sched_group(tg);
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	list_del_rcu(&tg->list);
@@ -7890,7 +7635,7 @@
 	queued = task_on_rq_queued(tsk);
 
 	if (queued)
-		dequeue_task(rq, tsk, DEQUEUE_SAVE);
+		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
 	if (unlikely(running))
 		put_prev_task(rq, tsk);
 
@@ -7914,7 +7659,7 @@
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
 	if (queued)
-		enqueue_task(rq, tsk, ENQUEUE_RESTORE);
+		enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
 
 	task_rq_unlock(rq, tsk, &flags);
 }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b2ab2ff..75f98c5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -262,21 +262,21 @@
 #ifdef CONFIG_PARAVIRT
 	if (static_key_false(&paravirt_steal_enabled)) {
 		u64 steal;
-		cputime_t steal_ct;
+		unsigned long steal_jiffies;
 
 		steal = paravirt_steal_clock(smp_processor_id());
 		steal -= this_rq()->prev_steal_time;
 
 		/*
-		 * cputime_t may be less precise than nsecs (eg: if it's
-		 * based on jiffies). Lets cast the result to cputime
+		 * steal is in nsecs but our caller is expecting steal
+		 * time in jiffies. Lets cast the result to jiffies
 		 * granularity and account the rest on the next rounds.
 		 */
-		steal_ct = nsecs_to_cputime(steal);
-		this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
+		steal_jiffies = nsecs_to_jiffies(steal);
+		this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
 
-		account_steal_time(steal_ct);
-		return steal_ct;
+		account_steal_time(jiffies_to_cputime(steal_jiffies));
+		return steal_jiffies;
 	}
 #endif
 	return false;
@@ -668,26 +668,25 @@
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static unsigned long long vtime_delta(struct task_struct *tsk)
+static cputime_t vtime_delta(struct task_struct *tsk)
 {
-	unsigned long long clock;
+	unsigned long now = READ_ONCE(jiffies);
 
-	clock = local_clock();
-	if (clock < tsk->vtime_snap)
+	if (time_before(now, (unsigned long)tsk->vtime_snap))
 		return 0;
 
-	return clock - tsk->vtime_snap;
+	return jiffies_to_cputime(now - tsk->vtime_snap);
 }
 
 static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
-	unsigned long long delta = vtime_delta(tsk);
+	unsigned long now = READ_ONCE(jiffies);
+	unsigned long delta = now - tsk->vtime_snap;
 
 	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
-	tsk->vtime_snap += delta;
+	tsk->vtime_snap = now;
 
-	/* CHECKME: always safe to convert nsecs to cputime? */
-	return nsecs_to_cputime(delta);
+	return jiffies_to_cputime(delta);
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
@@ -699,6 +698,9 @@
 
 void vtime_account_system(struct task_struct *tsk)
 {
+	if (!vtime_delta(tsk))
+		return;
+
 	write_seqcount_begin(&tsk->vtime_seqcount);
 	__vtime_account_system(tsk);
 	write_seqcount_end(&tsk->vtime_seqcount);
@@ -707,7 +709,8 @@
 void vtime_gen_account_irq_exit(struct task_struct *tsk)
 {
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	__vtime_account_system(tsk);
+	if (vtime_delta(tsk))
+		__vtime_account_system(tsk);
 	if (context_tracking_in_user())
 		tsk->vtime_snap_whence = VTIME_USER;
 	write_seqcount_end(&tsk->vtime_seqcount);
@@ -718,16 +721,19 @@
 	cputime_t delta_cpu;
 
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	delta_cpu = get_vtime_delta(tsk);
 	tsk->vtime_snap_whence = VTIME_SYS;
-	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	if (vtime_delta(tsk)) {
+		delta_cpu = get_vtime_delta(tsk);
+		account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	}
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
 
 void vtime_user_enter(struct task_struct *tsk)
 {
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	__vtime_account_system(tsk);
+	if (vtime_delta(tsk))
+		__vtime_account_system(tsk);
 	tsk->vtime_snap_whence = VTIME_USER;
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -742,7 +748,8 @@
 	 * that can thus safely catch up with a tickless delta.
 	 */
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	__vtime_account_system(tsk);
+	if (vtime_delta(tsk))
+		__vtime_account_system(tsk);
 	current->flags |= PF_VCPU;
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -772,7 +779,7 @@
 
 	write_seqcount_begin(&current->vtime_seqcount);
 	current->vtime_snap_whence = VTIME_SYS;
-	current->vtime_snap = sched_clock_cpu(smp_processor_id());
+	current->vtime_snap = jiffies;
 	write_seqcount_end(&current->vtime_seqcount);
 }
 
@@ -783,7 +790,7 @@
 	local_irq_save(flags);
 	write_seqcount_begin(&t->vtime_seqcount);
 	t->vtime_snap_whence = VTIME_SYS;
-	t->vtime_snap = sched_clock_cpu(cpu);
+	t->vtime_snap = jiffies;
 	write_seqcount_end(&t->vtime_seqcount);
 	local_irq_restore(flags);
 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index cd64c97..c7a036f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -352,7 +352,15 @@
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
+	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
+
+	/*
+	 * We are racing with the deadline timer. So, do nothing because
+	 * the deadline timer handler will take care of properly recharging
+	 * the runtime and postponing the deadline
+	 */
+	if (dl_se->dl_throttled)
+		return;
 
 	/*
 	 * We use the regular wall clock time to set deadlines in the
@@ -361,7 +369,6 @@
 	 */
 	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 	dl_se->runtime = pi_se->dl_runtime;
-	dl_se->dl_new = 0;
 }
 
 /*
@@ -399,6 +406,9 @@
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 
+	if (dl_se->dl_yielded && dl_se->runtime > 0)
+		dl_se->runtime = 0;
+
 	/*
 	 * We keep moving the deadline away until we get some
 	 * available runtime for the entity. This ensures correct
@@ -420,7 +430,7 @@
 	 * entity.
 	 */
 	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
-		printk_deferred_once("sched: DL replenish lagged to much\n");
+		printk_deferred_once("sched: DL replenish lagged too much\n");
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
@@ -500,15 +510,6 @@
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	/*
-	 * The arrival of a new instance needs special treatment, i.e.,
-	 * the actual scheduling parameters have to be "renewed".
-	 */
-	if (dl_se->dl_new) {
-		setup_new_dl_entity(dl_se, pi_se);
-		return;
-	}
-
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
@@ -605,16 +606,6 @@
 	}
 
 	/*
-	 * This is possible if switched_from_dl() raced against a running
-	 * callback that took the above !dl_task() path and we've since then
-	 * switched back into SCHED_DEADLINE.
-	 *
-	 * There's nothing to do except drop our task reference.
-	 */
-	if (dl_se->dl_new)
-		goto unlock;
-
-	/*
 	 * The task might have been boosted by someone else and might be in the
 	 * boosting/deboosting path, its not throttled.
 	 */
@@ -735,8 +726,11 @@
 	 * approach need further study.
 	 */
 	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
-	if (unlikely((s64)delta_exec <= 0))
+	if (unlikely((s64)delta_exec <= 0)) {
+		if (unlikely(dl_se->dl_yielded))
+			goto throttle;
 		return;
+	}
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
@@ -749,8 +743,10 @@
 
 	sched_rt_avg_update(rq, delta_exec);
 
-	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
-	if (dl_runtime_exceeded(dl_se)) {
+	dl_se->runtime -= delta_exec;
+
+throttle:
+	if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
 		dl_se->dl_throttled = 1;
 		__dequeue_task_dl(rq, curr, 0);
 		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
@@ -917,7 +913,7 @@
 	 * parameters of the task might need updating. Otherwise,
 	 * we want a replenishment of its runtime.
 	 */
-	if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
+	if (flags & ENQUEUE_WAKEUP)
 		update_dl_entity(dl_se, pi_se);
 	else if (flags & ENQUEUE_REPLENISH)
 		replenish_dl_entity(dl_se, pi_se);
@@ -994,18 +990,14 @@
  */
 static void yield_task_dl(struct rq *rq)
 {
-	struct task_struct *p = rq->curr;
-
 	/*
 	 * We make the task go to sleep until its current deadline by
 	 * forcing its runtime to zero. This way, update_curr_dl() stops
 	 * it and the bandwidth timer will wake it up and will give it
 	 * new scheduling parameters (thanks to dl_yielded=1).
 	 */
-	if (p->dl.runtime > 0) {
-		rq->curr->dl.dl_yielded = 1;
-		p->dl.runtime = 0;
-	}
+	rq->curr->dl.dl_yielded = 1;
+
 	update_rq_clock(rq);
 	update_curr_dl(rq);
 	/*
@@ -1722,6 +1714,9 @@
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
+	if (dl_time_before(p->dl.deadline, rq_clock(rq)))
+		setup_new_dl_entity(&p->dl, &p->dl);
+
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
@@ -1768,8 +1763,7 @@
 		 */
 		resched_curr(rq);
 #endif /* CONFIG_SMP */
-	} else
-		switched_to_dl(rq, p);
+	}
 }
 
 const struct sched_class dl_sched_class = {
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 6415117..4fbc3bd 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -16,6 +16,7 @@
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 #include <linux/mempolicy.h>
+#include <linux/debugfs.h>
 
 #include "sched.h"
 
@@ -58,6 +59,309 @@
 
 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
 
+#define SCHED_FEAT(name, enabled)	\
+	#name ,
+
+static const char * const sched_feat_names[] = {
+#include "features.h"
+};
+
+#undef SCHED_FEAT
+
+static int sched_feat_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0; i < __SCHED_FEAT_NR; i++) {
+		if (!(sysctl_sched_features & (1UL << i)))
+			seq_puts(m, "NO_");
+		seq_printf(m, "%s ", sched_feat_names[i]);
+	}
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
+#ifdef HAVE_JUMP_LABEL
+
+#define jump_label_key__true  STATIC_KEY_INIT_TRUE
+#define jump_label_key__false STATIC_KEY_INIT_FALSE
+
+#define SCHED_FEAT(name, enabled)	\
+	jump_label_key__##enabled ,
+
+struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
+#include "features.h"
+};
+
+#undef SCHED_FEAT
+
+static void sched_feat_disable(int i)
+{
+	static_key_disable(&sched_feat_keys[i]);
+}
+
+static void sched_feat_enable(int i)
+{
+	static_key_enable(&sched_feat_keys[i]);
+}
+#else
+static void sched_feat_disable(int i) { };
+static void sched_feat_enable(int i) { };
+#endif /* HAVE_JUMP_LABEL */
+
+static int sched_feat_set(char *cmp)
+{
+	int i;
+	int neg = 0;
+
+	if (strncmp(cmp, "NO_", 3) == 0) {
+		neg = 1;
+		cmp += 3;
+	}
+
+	for (i = 0; i < __SCHED_FEAT_NR; i++) {
+		if (strcmp(cmp, sched_feat_names[i]) == 0) {
+			if (neg) {
+				sysctl_sched_features &= ~(1UL << i);
+				sched_feat_disable(i);
+			} else {
+				sysctl_sched_features |= (1UL << i);
+				sched_feat_enable(i);
+			}
+			break;
+		}
+	}
+
+	return i;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	char *cmp;
+	int i;
+	struct inode *inode;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+	cmp = strstrip(buf);
+
+	/* Ensure the static_key remains in a consistent state */
+	inode = file_inode(filp);
+	inode_lock(inode);
+	i = sched_feat_set(cmp);
+	inode_unlock(inode);
+	if (i == __SCHED_FEAT_NR)
+		return -EINVAL;
+
+	*ppos += cnt;
+
+	return cnt;
+}
+
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_feat_show, NULL);
+}
+
+static const struct file_operations sched_feat_fops = {
+	.open		= sched_feat_open,
+	.write		= sched_feat_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static __init int sched_init_debug(void)
+{
+	debugfs_create_file("sched_features", 0644, NULL, NULL,
+			&sched_feat_fops);
+
+	return 0;
+}
+late_initcall(sched_init_debug);
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_SYSCTL
+
+static struct ctl_table sd_ctl_dir[] = {
+	{
+		.procname	= "sched_domain",
+		.mode		= 0555,
+	},
+	{}
+};
+
+static struct ctl_table sd_ctl_root[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= sd_ctl_dir,
+	},
+	{}
+};
+
+static struct ctl_table *sd_alloc_ctl_entry(int n)
+{
+	struct ctl_table *entry =
+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
+
+	return entry;
+}
+
+static void sd_free_ctl_entry(struct ctl_table **tablep)
+{
+	struct ctl_table *entry;
+
+	/*
+	 * In the intermediate directories, both the child directory and
+	 * procname are dynamically allocated and could fail but the mode
+	 * will always be set. In the lowest directory the names are
+	 * static strings and all have proc handlers.
+	 */
+	for (entry = *tablep; entry->mode; entry++) {
+		if (entry->child)
+			sd_free_ctl_entry(&entry->child);
+		if (entry->proc_handler == NULL)
+			kfree(entry->procname);
+	}
+
+	kfree(*tablep);
+	*tablep = NULL;
+}
+
+static int min_load_idx = 0;
+static int max_load_idx = CPU_LOAD_IDX_MAX-1;
+
+static void
+set_table_entry(struct ctl_table *entry,
+		const char *procname, void *data, int maxlen,
+		umode_t mode, proc_handler *proc_handler,
+		bool load_idx)
+{
+	entry->procname = procname;
+	entry->data = data;
+	entry->maxlen = maxlen;
+	entry->mode = mode;
+	entry->proc_handler = proc_handler;
+
+	if (load_idx) {
+		entry->extra1 = &min_load_idx;
+		entry->extra2 = &max_load_idx;
+	}
+}
+
+static struct ctl_table *
+sd_alloc_ctl_domain_table(struct sched_domain *sd)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(14);
+
+	if (table == NULL)
+		return NULL;
+
+	set_table_entry(&table[0], "min_interval", &sd->min_interval,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[1], "max_interval", &sd->max_interval,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[9], "cache_nice_tries",
+		&sd->cache_nice_tries,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[10], "flags", &sd->flags,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[11], "max_newidle_lb_cost",
+		&sd->max_newidle_lb_cost,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[12], "name", sd->name,
+		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
+	/* &table[13] is terminator */
+
+	return table;
+}
+
+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
+{
+	struct ctl_table *entry, *table;
+	struct sched_domain *sd;
+	int domain_num = 0, i;
+	char buf[32];
+
+	for_each_domain(cpu, sd)
+		domain_num++;
+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
+	if (table == NULL)
+		return NULL;
+
+	i = 0;
+	for_each_domain(cpu, sd) {
+		snprintf(buf, 32, "domain%d", i);
+		entry->procname = kstrdup(buf, GFP_KERNEL);
+		entry->mode = 0555;
+		entry->child = sd_alloc_ctl_domain_table(sd);
+		entry++;
+		i++;
+	}
+	return table;
+}
+
+static struct ctl_table_header *sd_sysctl_header;
+void register_sched_domain_sysctl(void)
+{
+	int i, cpu_num = num_possible_cpus();
+	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
+	char buf[32];
+
+	WARN_ON(sd_ctl_dir[0].child);
+	sd_ctl_dir[0].child = entry;
+
+	if (entry == NULL)
+		return;
+
+	for_each_possible_cpu(i) {
+		snprintf(buf, 32, "cpu%d", i);
+		entry->procname = kstrdup(buf, GFP_KERNEL);
+		entry->mode = 0555;
+		entry->child = sd_alloc_ctl_cpu_table(i);
+		entry++;
+	}
+
+	WARN_ON(sd_sysctl_header);
+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
+}
+
+/* may be called multiple times per register */
+void unregister_sched_domain_sysctl(void)
+{
+	unregister_sysctl_table(sd_sysctl_header);
+	sd_sysctl_header = NULL;
+	if (sd_ctl_dir[0].child)
+		sd_free_ctl_entry(&sd_ctl_dir[0].child);
+}
+#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_SMP */
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
 {
@@ -75,16 +379,18 @@
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
 #ifdef CONFIG_SCHEDSTATS
-	PN(se->statistics.wait_start);
-	PN(se->statistics.sleep_start);
-	PN(se->statistics.block_start);
-	PN(se->statistics.sleep_max);
-	PN(se->statistics.block_max);
-	PN(se->statistics.exec_max);
-	PN(se->statistics.slice_max);
-	PN(se->statistics.wait_max);
-	PN(se->statistics.wait_sum);
-	P(se->statistics.wait_count);
+	if (schedstat_enabled()) {
+		PN(se->statistics.wait_start);
+		PN(se->statistics.sleep_start);
+		PN(se->statistics.block_start);
+		PN(se->statistics.sleep_max);
+		PN(se->statistics.block_max);
+		PN(se->statistics.exec_max);
+		PN(se->statistics.slice_max);
+		PN(se->statistics.wait_max);
+		PN(se->statistics.wait_sum);
+		P(se->statistics.wait_count);
+	}
 #endif
 	P(se->load.weight);
 #ifdef CONFIG_SMP
@@ -122,10 +428,12 @@
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(p->se.statistics.wait_sum),
-		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(p->se.statistics.sum_sleep_runtime));
+	if (schedstat_enabled()) {
+		SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+			SPLIT_NS(p->se.statistics.wait_sum),
+			SPLIT_NS(p->se.sum_exec_runtime),
+			SPLIT_NS(p->se.statistics.sum_sleep_runtime));
+	}
 #else
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 		0LL, 0L,
@@ -258,8 +566,17 @@
 
 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 {
+	struct dl_bw *dl_bw;
+
 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
 	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
+#ifdef CONFIG_SMP
+	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
+#else
+	dl_bw = &dl_rq->dl_bw;
+#endif
+	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
+	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
 }
 
 extern __read_mostly int sched_clock_running;
@@ -313,17 +630,18 @@
 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 
-	P(yld_count);
-
-	P(sched_count);
-	P(sched_goidle);
 #ifdef CONFIG_SMP
 	P64(avg_idle);
 	P64(max_idle_balance_cost);
 #endif
 
-	P(ttwu_count);
-	P(ttwu_local);
+	if (schedstat_enabled()) {
+		P(yld_count);
+		P(sched_count);
+		P(sched_goidle);
+		P(ttwu_count);
+		P(ttwu_local);
+	}
 
 #undef P
 #undef P64
@@ -569,38 +887,39 @@
 	nr_switches = p->nvcsw + p->nivcsw;
 
 #ifdef CONFIG_SCHEDSTATS
-	PN(se.statistics.sum_sleep_runtime);
-	PN(se.statistics.wait_start);
-	PN(se.statistics.sleep_start);
-	PN(se.statistics.block_start);
-	PN(se.statistics.sleep_max);
-	PN(se.statistics.block_max);
-	PN(se.statistics.exec_max);
-	PN(se.statistics.slice_max);
-	PN(se.statistics.wait_max);
-	PN(se.statistics.wait_sum);
-	P(se.statistics.wait_count);
-	PN(se.statistics.iowait_sum);
-	P(se.statistics.iowait_count);
 	P(se.nr_migrations);
-	P(se.statistics.nr_migrations_cold);
-	P(se.statistics.nr_failed_migrations_affine);
-	P(se.statistics.nr_failed_migrations_running);
-	P(se.statistics.nr_failed_migrations_hot);
-	P(se.statistics.nr_forced_migrations);
-	P(se.statistics.nr_wakeups);
-	P(se.statistics.nr_wakeups_sync);
-	P(se.statistics.nr_wakeups_migrate);
-	P(se.statistics.nr_wakeups_local);
-	P(se.statistics.nr_wakeups_remote);
-	P(se.statistics.nr_wakeups_affine);
-	P(se.statistics.nr_wakeups_affine_attempts);
-	P(se.statistics.nr_wakeups_passive);
-	P(se.statistics.nr_wakeups_idle);
 
-	{
+	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
+		PN(se.statistics.sum_sleep_runtime);
+		PN(se.statistics.wait_start);
+		PN(se.statistics.sleep_start);
+		PN(se.statistics.block_start);
+		PN(se.statistics.sleep_max);
+		PN(se.statistics.block_max);
+		PN(se.statistics.exec_max);
+		PN(se.statistics.slice_max);
+		PN(se.statistics.wait_max);
+		PN(se.statistics.wait_sum);
+		P(se.statistics.wait_count);
+		PN(se.statistics.iowait_sum);
+		P(se.statistics.iowait_count);
+		P(se.statistics.nr_migrations_cold);
+		P(se.statistics.nr_failed_migrations_affine);
+		P(se.statistics.nr_failed_migrations_running);
+		P(se.statistics.nr_failed_migrations_hot);
+		P(se.statistics.nr_forced_migrations);
+		P(se.statistics.nr_wakeups);
+		P(se.statistics.nr_wakeups_sync);
+		P(se.statistics.nr_wakeups_migrate);
+		P(se.statistics.nr_wakeups_local);
+		P(se.statistics.nr_wakeups_remote);
+		P(se.statistics.nr_wakeups_affine);
+		P(se.statistics.nr_wakeups_affine_attempts);
+		P(se.statistics.nr_wakeups_passive);
+		P(se.statistics.nr_wakeups_idle);
+
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
 			avg_atom = div64_ul(avg_atom, nr_switches);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1926606..3313052 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,8 +20,8 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  */
 
-#include <linux/latencytop.h>
 #include <linux/sched.h>
+#include <linux/latencytop.h>
 #include <linux/cpumask.h>
 #include <linux/cpuidle.h>
 #include <linux/slab.h>
@@ -755,7 +755,9 @@
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
-	u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+	u64 delta;
+
+	delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -776,22 +778,12 @@
 	se->statistics.wait_sum += delta;
 	se->statistics.wait_start = 0;
 }
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-#endif
 
 /*
  * Task is being enqueued - update stats:
  */
-static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
@@ -802,7 +794,7 @@
 }
 
 static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	/*
 	 * Mark the end of the wait period if dequeueing a
@@ -810,7 +802,40 @@
 	 */
 	if (se != cfs_rq->curr)
 		update_stats_wait_end(cfs_rq, se);
+
+	if (flags & DEQUEUE_SLEEP) {
+		if (entity_is_task(se)) {
+			struct task_struct *tsk = task_of(se);
+
+			if (tsk->state & TASK_INTERRUPTIBLE)
+				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
+			if (tsk->state & TASK_UNINTERRUPTIBLE)
+				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
+		}
+	}
+
 }
+#else
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+{
+}
+#endif
 
 /*
  * We are picking a new current task - update its stats:
@@ -907,10 +932,11 @@
 	spinlock_t lock; /* nr_tasks, tasks */
 	int nr_tasks;
 	pid_t gid;
+	int active_nodes;
 
 	struct rcu_head rcu;
-	nodemask_t active_nodes;
 	unsigned long total_faults;
+	unsigned long max_faults_cpu;
 	/*
 	 * Faults_cpu is used to decide whether memory should move
 	 * towards the CPU. As a consequence, these stats are weighted
@@ -969,6 +995,18 @@
 		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
 }
 
+/*
+ * A node triggering more than 1/3 as many NUMA faults as the maximum is
+ * considered part of a numa group's pseudo-interleaving set. Migrations
+ * between these nodes are slowed down, to allow things to settle down.
+ */
+#define ACTIVE_NODE_FRACTION 3
+
+static bool numa_is_active_node(int nid, struct numa_group *ng)
+{
+	return group_faults_cpu(ng, nid) * ACTIVE_NODE_FRACTION > ng->max_faults_cpu;
+}
+
 /* Handle placement on systems where not all nodes are directly connected. */
 static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
 					int maxdist, bool task)
@@ -1118,27 +1156,23 @@
 		return true;
 
 	/*
-	 * Do not migrate if the destination is not a node that
-	 * is actively used by this numa group.
+	 * Destination node is much more heavily used than the source
+	 * node? Allow migration.
 	 */
-	if (!node_isset(dst_nid, ng->active_nodes))
-		return false;
-
-	/*
-	 * Source is a node that is not actively used by this
-	 * numa group, while the destination is. Migrate.
-	 */
-	if (!node_isset(src_nid, ng->active_nodes))
+	if (group_faults_cpu(ng, dst_nid) > group_faults_cpu(ng, src_nid) *
+					ACTIVE_NODE_FRACTION)
 		return true;
 
 	/*
-	 * Both source and destination are nodes in active
-	 * use by this numa group. Maximize memory bandwidth
-	 * by migrating from more heavily used groups, to less
-	 * heavily used ones, spreading the load around.
-	 * Use a 1/4 hysteresis to avoid spurious page movement.
+	 * Distribute memory according to CPU & memory use on each node,
+	 * with 3/4 hysteresis to avoid unnecessary memory migrations:
+	 *
+	 * faults_cpu(dst)   3   faults_cpu(src)
+	 * --------------- * - > ---------------
+	 * faults_mem(dst)   4   faults_mem(src)
 	 */
-	return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4);
+	return group_faults_cpu(ng, dst_nid) * group_faults(p, src_nid) * 3 >
+	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
 }
 
 static unsigned long weighted_cpuload(const int cpu);
@@ -1220,8 +1254,6 @@
 {
 	if (env->best_task)
 		put_task_struct(env->best_task);
-	if (p)
-		get_task_struct(p);
 
 	env->best_task = p;
 	env->best_imp = imp;
@@ -1289,20 +1321,30 @@
 	long imp = env->p->numa_group ? groupimp : taskimp;
 	long moveimp = imp;
 	int dist = env->dist;
+	bool assigned = false;
 
 	rcu_read_lock();
 
 	raw_spin_lock_irq(&dst_rq->lock);
 	cur = dst_rq->curr;
 	/*
-	 * No need to move the exiting task, and this ensures that ->curr
-	 * wasn't reaped and thus get_task_struct() in task_numa_assign()
-	 * is safe under RCU read lock.
-	 * Note that rcu_read_lock() itself can't protect from the final
-	 * put_task_struct() after the last schedule().
+	 * No need to move the exiting task or idle task.
 	 */
 	if ((cur->flags & PF_EXITING) || is_idle_task(cur))
 		cur = NULL;
+	else {
+		/*
+		 * The task_struct must be protected here to protect the
+		 * p->numa_faults access in the task_weight since the
+		 * numa_faults could already be freed in the following path:
+		 * finish_task_switch()
+		 *     --> put_task_struct()
+		 *         --> __put_task_struct()
+		 *             --> task_numa_free()
+		 */
+		get_task_struct(cur);
+	}
+
 	raw_spin_unlock_irq(&dst_rq->lock);
 
 	/*
@@ -1386,6 +1428,7 @@
 		 */
 		if (!load_too_imbalanced(src_load, dst_load, env)) {
 			imp = moveimp - 1;
+			put_task_struct(cur);
 			cur = NULL;
 			goto assign;
 		}
@@ -1411,9 +1454,16 @@
 		env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
+	assigned = true;
 	task_numa_assign(env, cur, imp);
 unlock:
 	rcu_read_unlock();
+	/*
+	 * The dst_rq->curr isn't assigned. The protection for task_struct is
+	 * finished.
+	 */
+	if (cur && !assigned)
+		put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,
@@ -1468,7 +1518,7 @@
 
 		.best_task = NULL,
 		.best_imp = 0,
-		.best_cpu = -1
+		.best_cpu = -1,
 	};
 	struct sched_domain *sd;
 	unsigned long taskweight, groupweight;
@@ -1520,8 +1570,7 @@
 	 *   multiple NUMA nodes; in order to better consolidate the group,
 	 *   we need to check other locations.
 	 */
-	if (env.best_cpu == -1 || (p->numa_group &&
-			nodes_weight(p->numa_group->active_nodes) > 1)) {
+	if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
 		for_each_online_node(nid) {
 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
 				continue;
@@ -1556,12 +1605,14 @@
 	 * trying for a better one later. Do not set the preferred node here.
 	 */
 	if (p->numa_group) {
+		struct numa_group *ng = p->numa_group;
+
 		if (env.best_cpu == -1)
 			nid = env.src_nid;
 		else
 			nid = env.dst_nid;
 
-		if (node_isset(nid, p->numa_group->active_nodes))
+		if (ng->active_nodes > 1 && numa_is_active_node(env.dst_nid, ng))
 			sched_setnuma(p, env.dst_nid);
 	}
 
@@ -1611,20 +1662,15 @@
 }
 
 /*
- * Find the nodes on which the workload is actively running. We do this by
+ * Find out how many nodes on the workload is actively running on. Do this by
  * tracking the nodes from which NUMA hinting faults are triggered. This can
  * be different from the set of nodes where the workload's memory is currently
  * located.
- *
- * The bitmask is used to make smarter decisions on when to do NUMA page
- * migrations, To prevent flip-flopping, and excessive page migrations, nodes
- * are added when they cause over 6/16 of the maximum number of faults, but
- * only removed when they drop below 3/16.
  */
-static void update_numa_active_node_mask(struct numa_group *numa_group)
+static void numa_group_count_active_nodes(struct numa_group *numa_group)
 {
 	unsigned long faults, max_faults = 0;
-	int nid;
+	int nid, active_nodes = 0;
 
 	for_each_online_node(nid) {
 		faults = group_faults_cpu(numa_group, nid);
@@ -1634,12 +1680,12 @@
 
 	for_each_online_node(nid) {
 		faults = group_faults_cpu(numa_group, nid);
-		if (!node_isset(nid, numa_group->active_nodes)) {
-			if (faults > max_faults * 6 / 16)
-				node_set(nid, numa_group->active_nodes);
-		} else if (faults < max_faults * 3 / 16)
-			node_clear(nid, numa_group->active_nodes);
+		if (faults * ACTIVE_NODE_FRACTION > max_faults)
+			active_nodes++;
 	}
+
+	numa_group->max_faults_cpu = max_faults;
+	numa_group->active_nodes = active_nodes;
 }
 
 /*
@@ -1930,7 +1976,7 @@
 	update_task_scan_period(p, fault_types[0], fault_types[1]);
 
 	if (p->numa_group) {
-		update_numa_active_node_mask(p->numa_group);
+		numa_group_count_active_nodes(p->numa_group);
 		spin_unlock_irq(group_lock);
 		max_nid = preferred_group_nid(p, max_group_nid);
 	}
@@ -1974,14 +2020,14 @@
 			return;
 
 		atomic_set(&grp->refcount, 1);
+		grp->active_nodes = 1;
+		grp->max_faults_cpu = 0;
 		spin_lock_init(&grp->lock);
 		grp->gid = p->pid;
 		/* Second half of the array tracks nids where faults happen */
 		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
 						nr_node_ids;
 
-		node_set(task_node(current), grp->active_nodes);
-
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
 			grp->faults[i] = p->numa_faults[i];
 
@@ -2095,6 +2141,7 @@
 	bool migrated = flags & TNF_MIGRATED;
 	int cpu_node = task_node(current);
 	int local = !!(flags & TNF_FAULT_LOCAL);
+	struct numa_group *ng;
 	int priv;
 
 	if (!static_branch_likely(&sched_numa_balancing))
@@ -2135,9 +2182,10 @@
 	 * actively using should be counted as local. This allows the
 	 * scan rate to slow down when a workload has settled down.
 	 */
-	if (!priv && !local && p->numa_group &&
-			node_isset(cpu_node, p->numa_group->active_nodes) &&
-			node_isset(mem_node, p->numa_group->active_nodes))
+	ng = p->numa_group;
+	if (!priv && !local && ng && ng->active_nodes > 1 &&
+				numa_is_active_node(cpu_node, ng) &&
+				numa_is_active_node(mem_node, ng))
 		local = 1;
 
 	task_numa_placement(p);
@@ -3086,6 +3134,26 @@
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 
+static inline void check_schedstat_required(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	if (schedstat_enabled())
+		return;
+
+	/* Force schedstat enabled if a dependent tracepoint is active */
+	if (trace_sched_stat_wait_enabled()    ||
+			trace_sched_stat_sleep_enabled()   ||
+			trace_sched_stat_iowait_enabled()  ||
+			trace_sched_stat_blocked_enabled() ||
+			trace_sched_stat_runtime_enabled())  {
+		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+			     "stat_blocked and stat_runtime require the "
+			     "kernel parameter schedstats=enabled or "
+			     "kernel.sched_schedstats=1\n");
+	}
+#endif
+}
+
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
@@ -3106,11 +3174,15 @@
 
 	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
-		enqueue_sleeper(cfs_rq, se);
+		if (schedstat_enabled())
+			enqueue_sleeper(cfs_rq, se);
 	}
 
-	update_stats_enqueue(cfs_rq, se);
-	check_spread(cfs_rq, se);
+	check_schedstat_required();
+	if (schedstat_enabled()) {
+		update_stats_enqueue(cfs_rq, se);
+		check_spread(cfs_rq, se);
+	}
 	if (se != cfs_rq->curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
@@ -3177,19 +3249,8 @@
 	update_curr(cfs_rq);
 	dequeue_entity_load_avg(cfs_rq, se);
 
-	update_stats_dequeue(cfs_rq, se);
-	if (flags & DEQUEUE_SLEEP) {
-#ifdef CONFIG_SCHEDSTATS
-		if (entity_is_task(se)) {
-			struct task_struct *tsk = task_of(se);
-
-			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-		}
-#endif
-	}
+	if (schedstat_enabled())
+		update_stats_dequeue(cfs_rq, se, flags);
 
 	clear_buddies(cfs_rq, se);
 
@@ -3263,7 +3324,8 @@
 		 * a CPU. So account for the time it spent waiting on the
 		 * runqueue.
 		 */
-		update_stats_wait_end(cfs_rq, se);
+		if (schedstat_enabled())
+			update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
 		update_load_avg(se, 1);
 	}
@@ -3276,7 +3338,7 @@
 	 * least twice that of our own weight (i.e. dont track it
 	 * when there are only lesser-weight tasks around):
 	 */
-	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
+	if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
 		se->statistics.slice_max = max(se->statistics.slice_max,
 			se->sum_exec_runtime - se->prev_sum_exec_runtime);
 	}
@@ -3359,9 +3421,13 @@
 	/* throttle cfs_rqs exceeding runtime */
 	check_cfs_rq_runtime(cfs_rq);
 
-	check_spread(cfs_rq, prev);
+	if (schedstat_enabled()) {
+		check_spread(cfs_rq, prev);
+		if (prev->on_rq)
+			update_stats_wait_start(cfs_rq, prev);
+	}
+
 	if (prev->on_rq) {
-		update_stats_wait_start(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */
@@ -4443,9 +4509,17 @@
 
 		/* scale is effectively 1 << i now, and >> i divides by scale */
 
-		old_load = this_rq->cpu_load[i] - tickless_load;
+		old_load = this_rq->cpu_load[i];
 		old_load = decay_load_missed(old_load, pending_updates - 1, i);
-		old_load += tickless_load;
+		if (tickless_load) {
+			old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
+			/*
+			 * old_load can never be a negative value because a
+			 * decayed tickless_load cannot be greater than the
+			 * original tickless_load.
+			 */
+			old_load += tickless_load;
+		}
 		new_load = this_load;
 		/*
 		 * Round up the averaging division if load is increasing. This
@@ -4468,6 +4542,25 @@
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
+static void __update_cpu_load_nohz(struct rq *this_rq,
+				   unsigned long curr_jiffies,
+				   unsigned long load,
+				   int active)
+{
+	unsigned long pending_updates;
+
+	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+	if (pending_updates) {
+		this_rq->last_load_update_tick = curr_jiffies;
+		/*
+		 * In the regular NOHZ case, we were idle, this means load 0.
+		 * In the NOHZ_FULL case, we were non-idle, we should consider
+		 * its weighted load.
+		 */
+		__update_cpu_load(this_rq, load, pending_updates, active);
+	}
+}
+
 /*
  * There is no sane way to deal with nohz on smp when using jiffies because the
  * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -4485,22 +4578,15 @@
  * Called from nohz_idle_balance() to update the load ratings before doing the
  * idle balance.
  */
-static void update_idle_cpu_load(struct rq *this_rq)
+static void update_cpu_load_idle(struct rq *this_rq)
 {
-	unsigned long curr_jiffies = READ_ONCE(jiffies);
-	unsigned long load = weighted_cpuload(cpu_of(this_rq));
-	unsigned long pending_updates;
-
 	/*
 	 * bail if there's load or we're actually up-to-date.
 	 */
-	if (load || curr_jiffies == this_rq->last_load_update_tick)
+	if (weighted_cpuload(cpu_of(this_rq)))
 		return;
 
-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-	this_rq->last_load_update_tick = curr_jiffies;
-
-	__update_cpu_load(this_rq, load, pending_updates, 0);
+	__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
 }
 
 /*
@@ -4511,22 +4597,12 @@
 	struct rq *this_rq = this_rq();
 	unsigned long curr_jiffies = READ_ONCE(jiffies);
 	unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
-	unsigned long pending_updates;
 
 	if (curr_jiffies == this_rq->last_load_update_tick)
 		return;
 
 	raw_spin_lock(&this_rq->lock);
-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-	if (pending_updates) {
-		this_rq->last_load_update_tick = curr_jiffies;
-		/*
-		 * In the regular NOHZ case, we were idle, this means load 0.
-		 * In the NOHZ_FULL case, we were non-idle, we should consider
-		 * its weighted load.
-		 */
-		__update_cpu_load(this_rq, load, pending_updates, active);
-	}
+	__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
 	raw_spin_unlock(&this_rq->lock);
 }
 #endif /* CONFIG_NO_HZ */
@@ -4538,7 +4614,7 @@
 {
 	unsigned long load = weighted_cpuload(cpu_of(this_rq));
 	/*
-	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
+	 * See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
 	 */
 	this_rq->last_load_update_tick = jiffies;
 	__update_cpu_load(this_rq, load, 1, 1);
@@ -7832,7 +7908,7 @@
 		if (time_after_eq(jiffies, rq->next_balance)) {
 			raw_spin_lock_irq(&rq->lock);
 			update_rq_clock(rq);
-			update_idle_cpu_load(rq);
+			update_cpu_load_idle(rq);
 			raw_spin_unlock_irq(&rq->lock);
 			rebalance_domains(rq, CPU_IDLE);
 		}
@@ -8218,11 +8294,8 @@
 	for_each_possible_cpu(i) {
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
-		if (tg->se) {
-			if (tg->se[i])
-				remove_entity_load_avg(tg->se[i]);
+		if (tg->se)
 			kfree(tg->se[i]);
-		}
 	}
 
 	kfree(tg->cfs_rq);
@@ -8270,21 +8343,29 @@
 	return 0;
 }
 
-void unregister_fair_sched_group(struct task_group *tg, int cpu)
+void unregister_fair_sched_group(struct task_group *tg)
 {
-	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
+	struct rq *rq;
+	int cpu;
 
-	/*
-	* Only empty task groups can be destroyed; so we can speculatively
-	* check on_list without danger of it being re-added.
-	*/
-	if (!tg->cfs_rq[cpu]->on_list)
-		return;
+	for_each_possible_cpu(cpu) {
+		if (tg->se[cpu])
+			remove_entity_load_avg(tg->se[cpu]);
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
-	list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+		/*
+		 * Only empty task groups can be destroyed; so we can speculatively
+		 * check on_list without danger of it being re-added.
+		 */
+		if (!tg->cfs_rq[cpu]->on_list)
+			continue;
+
+		rq = cpu_rq(cpu);
+
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
 }
 
 void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
@@ -8366,7 +8447,7 @@
 	return 1;
 }
 
-void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
+void unregister_fair_sched_group(struct task_group *tg) { }
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index de0e786..544a713 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -162,7 +162,7 @@
 	 */
 	if (idle_should_freeze()) {
 		entered_state = cpuidle_enter_freeze(drv, dev);
-		if (entered_state >= 0) {
+		if (entered_state > 0) {
 			local_irq_enable();
 			goto exit_idle;
 		}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8ec86ab..5624713 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -58,7 +58,15 @@
 	raw_spin_lock(&rt_b->rt_runtime_lock);
 	if (!rt_b->rt_period_active) {
 		rt_b->rt_period_active = 1;
-		hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+		/*
+		 * SCHED_DEADLINE updates the bandwidth, as a run away
+		 * RT task with a DL task could hog a CPU. But DL does
+		 * not reset the period. If a deadline task was running
+		 * without an RT task running, it can cause RT tasks to
+		 * throttle when they start up. Kick the timer right away
+		 * to update the period.
+		 */
+		hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
 		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
 	}
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
@@ -436,7 +444,7 @@
 
 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 {
-	return !list_empty(&rt_se->run_list);
+	return rt_se->on_rq;
 }
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -482,8 +490,8 @@
 	return rt_se->my_q;
 }
 
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
-static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
 
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
@@ -499,7 +507,7 @@
 		if (!rt_se)
 			enqueue_top_rt_rq(rt_rq);
 		else if (!on_rt_rq(rt_se))
-			enqueue_rt_entity(rt_se, false);
+			enqueue_rt_entity(rt_se, 0);
 
 		if (rt_rq->highest_prio.curr < curr->prio)
 			resched_curr(rq);
@@ -516,7 +524,7 @@
 	if (!rt_se)
 		dequeue_top_rt_rq(rt_rq);
 	else if (on_rt_rq(rt_se))
-		dequeue_rt_entity(rt_se);
+		dequeue_rt_entity(rt_se, 0);
 }
 
 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1142,12 +1150,27 @@
 }
 
 static inline
+unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *group_rq = group_rt_rq(rt_se);
+	struct task_struct *tsk;
+
+	if (group_rq)
+		return group_rq->rr_nr_running;
+
+	tsk = rt_task_of(rt_se);
+
+	return (tsk->policy == SCHED_RR) ? 1 : 0;
+}
+
+static inline
 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
 	int prio = rt_se_prio(rt_se);
 
 	WARN_ON(!rt_prio(prio));
 	rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
+	rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
 
 	inc_rt_prio(rt_rq, prio);
 	inc_rt_migration(rt_se, rt_rq);
@@ -1160,13 +1183,37 @@
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	WARN_ON(!rt_rq->rt_nr_running);
 	rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
+	rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
 
 	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
 	dec_rt_migration(rt_se, rt_rq);
 	dec_rt_group(rt_se, rt_rq);
 }
 
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+/*
+ * Change rt_se->run_list location unless SAVE && !MOVE
+ *
+ * assumes ENQUEUE/DEQUEUE flags match
+ */
+static inline bool move_entity(unsigned int flags)
+{
+	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+		return false;
+
+	return true;
+}
+
+static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+{
+	list_del_init(&rt_se->run_list);
+
+	if (list_empty(array->queue + rt_se_prio(rt_se)))
+		__clear_bit(rt_se_prio(rt_se), array->bitmap);
+
+	rt_se->on_list = 0;
+}
+
+static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	struct rt_prio_array *array = &rt_rq->active;
@@ -1179,26 +1226,37 @@
 	 * get throttled and the current group doesn't have any other
 	 * active members.
 	 */
-	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
+		if (rt_se->on_list)
+			__delist_rt_entity(rt_se, array);
 		return;
+	}
 
-	if (head)
-		list_add(&rt_se->run_list, queue);
-	else
-		list_add_tail(&rt_se->run_list, queue);
-	__set_bit(rt_se_prio(rt_se), array->bitmap);
+	if (move_entity(flags)) {
+		WARN_ON_ONCE(rt_se->on_list);
+		if (flags & ENQUEUE_HEAD)
+			list_add(&rt_se->run_list, queue);
+		else
+			list_add_tail(&rt_se->run_list, queue);
+
+		__set_bit(rt_se_prio(rt_se), array->bitmap);
+		rt_se->on_list = 1;
+	}
+	rt_se->on_rq = 1;
 
 	inc_rt_tasks(rt_se, rt_rq);
 }
 
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
+static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	struct rt_prio_array *array = &rt_rq->active;
 
-	list_del_init(&rt_se->run_list);
-	if (list_empty(array->queue + rt_se_prio(rt_se)))
-		__clear_bit(rt_se_prio(rt_se), array->bitmap);
+	if (move_entity(flags)) {
+		WARN_ON_ONCE(!rt_se->on_list);
+		__delist_rt_entity(rt_se, array);
+	}
+	rt_se->on_rq = 0;
 
 	dec_rt_tasks(rt_se, rt_rq);
 }
@@ -1207,7 +1265,7 @@
  * Because the prio of an upper entry depends on the lower
  * entries, we must remove entries top - down.
  */
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct sched_rt_entity *back = NULL;
 
@@ -1220,31 +1278,31 @@
 
 	for (rt_se = back; rt_se; rt_se = rt_se->back) {
 		if (on_rt_rq(rt_se))
-			__dequeue_rt_entity(rt_se);
+			__dequeue_rt_entity(rt_se, flags);
 	}
 }
 
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rq *rq = rq_of_rt_se(rt_se);
 
-	dequeue_rt_stack(rt_se);
+	dequeue_rt_stack(rt_se, flags);
 	for_each_sched_rt_entity(rt_se)
-		__enqueue_rt_entity(rt_se, head);
+		__enqueue_rt_entity(rt_se, flags);
 	enqueue_top_rt_rq(&rq->rt);
 }
 
-static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rq *rq = rq_of_rt_se(rt_se);
 
-	dequeue_rt_stack(rt_se);
+	dequeue_rt_stack(rt_se, flags);
 
 	for_each_sched_rt_entity(rt_se) {
 		struct rt_rq *rt_rq = group_rt_rq(rt_se);
 
 		if (rt_rq && rt_rq->rt_nr_running)
-			__enqueue_rt_entity(rt_se, false);
+			__enqueue_rt_entity(rt_se, flags);
 	}
 	enqueue_top_rt_rq(&rq->rt);
 }
@@ -1260,7 +1318,7 @@
 	if (flags & ENQUEUE_WAKEUP)
 		rt_se->timeout = 0;
 
-	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
+	enqueue_rt_entity(rt_se, flags);
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
@@ -1271,7 +1329,7 @@
 	struct sched_rt_entity *rt_se = &p->rt;
 
 	update_curr_rt(rq);
-	dequeue_rt_entity(rt_se);
+	dequeue_rt_entity(rt_se, flags);
 
 	dequeue_pushable_task(rq, p);
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 10f1637..b2ff5a2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
+#include <linux/binfmts.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
@@ -313,12 +314,11 @@
 
 extern void free_fair_sched_group(struct task_group *tg);
 extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
-extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
+extern void unregister_fair_sched_group(struct task_group *tg);
 extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 			struct sched_entity *se, int cpu,
 			struct sched_entity *parent);
 extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
-extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
 
 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
@@ -450,6 +450,7 @@
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned int rt_nr_running;
+	unsigned int rr_nr_running;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	struct {
 		int curr; /* highest queued rt task prio */
@@ -909,6 +910,18 @@
 
 extern int group_balance_cpu(struct sched_group *sg);
 
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+void register_sched_domain_sysctl(void);
+void unregister_sched_domain_sysctl(void);
+#else
+static inline void register_sched_domain_sysctl(void)
+{
+}
+static inline void unregister_sched_domain_sysctl(void)
+{
+}
+#endif
+
 #else
 
 static inline void sched_ttwu_pending(void) { }
@@ -1022,6 +1035,7 @@
 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
 
 extern struct static_key_false sched_numa_balancing;
+extern struct static_key_false sched_schedstats;
 
 static inline u64 global_rt_period(void)
 {
@@ -1130,18 +1144,40 @@
 extern const int sched_prio_to_weight[40];
 extern const u32 sched_prio_to_wmult[40];
 
+/*
+ * {de,en}queue flags:
+ *
+ * DEQUEUE_SLEEP  - task is no longer runnable
+ * ENQUEUE_WAKEUP - task just became runnable
+ *
+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
+ *                are in a known state which allows modification. Such pairs
+ *                should preserve as much state as possible.
+ *
+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+ *        in the runqueue.
+ *
+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+ * ENQUEUE_WAKING    - sched_class::task_waking was called
+ *
+ */
+
+#define DEQUEUE_SLEEP		0x01
+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
+#define DEQUEUE_MOVE		0x04 /* matches ENQUEUE_MOVE */
+
 #define ENQUEUE_WAKEUP		0x01
-#define ENQUEUE_HEAD		0x02
+#define ENQUEUE_RESTORE		0x02
+#define ENQUEUE_MOVE		0x04
+
+#define ENQUEUE_HEAD		0x08
+#define ENQUEUE_REPLENISH	0x10
 #ifdef CONFIG_SMP
-#define ENQUEUE_WAKING		0x04	/* sched_class::task_waking was called */
+#define ENQUEUE_WAKING		0x20
 #else
 #define ENQUEUE_WAKING		0x00
 #endif
-#define ENQUEUE_REPLENISH	0x08
-#define ENQUEUE_RESTORE	0x10
-
-#define DEQUEUE_SLEEP		0x01
-#define DEQUEUE_SAVE		0x02
 
 #define RETRY_TASK		((void *)-1UL)
 
@@ -1278,6 +1314,35 @@
 
 extern void init_entity_runnable_average(struct sched_entity *se);
 
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(struct rq *rq);
+
+/*
+ * Tick may be needed by tasks in the runqueue depending on their policy and
+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
+ * nohz mode if necessary.
+ */
+static inline void sched_update_tick_dependency(struct rq *rq)
+{
+	int cpu;
+
+	if (!tick_nohz_full_enabled())
+		return;
+
+	cpu = cpu_of(rq);
+
+	if (!tick_nohz_full_cpu(cpu))
+		return;
+
+	if (sched_can_stop_tick(rq))
+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
+	else
+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+#else
+static inline void sched_update_tick_dependency(struct rq *rq) { }
+#endif
+
 static inline void add_nr_running(struct rq *rq, unsigned count)
 {
 	unsigned prev_nr = rq->nr_running;
@@ -1289,26 +1354,16 @@
 		if (!rq->rd->overload)
 			rq->rd->overload = true;
 #endif
-
-#ifdef CONFIG_NO_HZ_FULL
-		if (tick_nohz_full_cpu(rq->cpu)) {
-			/*
-			 * Tick is needed if more than one task runs on a CPU.
-			 * Send the target an IPI to kick it out of nohz mode.
-			 *
-			 * We assume that IPI implies full memory barrier and the
-			 * new value of rq->nr_running is visible on reception
-			 * from the target.
-			 */
-			tick_nohz_full_kick_cpu(rq->cpu);
-		}
-#endif
 	}
+
+	sched_update_tick_dependency(rq);
 }
 
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
 	rq->nr_running -= count;
+	/* Check if we still need preemption */
+	sched_update_tick_dependency(rq);
 }
 
 static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index b0fbc76..70b3b6a 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,9 +29,10 @@
 	if (rq)
 		rq->rq_sched_info.run_delay += delta;
 }
-# define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
-# define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
-# define schedstat_set(var, val)	do { var = (val); } while (0)
+# define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
+# define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
+# define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
+# define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
 rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -42,6 +43,7 @@
 static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {}
+# define schedstat_enabled()		0
 # define schedstat_inc(rq, field)	do { } while (0)
 # define schedstat_add(rq, field, amt)	do { } while (0)
 # define schedstat_set(var, val)	do { } while (0)
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
new file mode 100644
index 0000000..82f0dff
--- /dev/null
+++ b/kernel/sched/swait.c
@@ -0,0 +1,123 @@
+#include <linux/sched.h>
+#include <linux/swait.h>
+
+void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+			     struct lock_class_key *key)
+{
+	raw_spin_lock_init(&q->lock);
+	lockdep_set_class_and_name(&q->lock, key, name);
+	INIT_LIST_HEAD(&q->task_list);
+}
+EXPORT_SYMBOL(__init_swait_queue_head);
+
+/*
+ * The thing about the wake_up_state() return value; I think we can ignore it.
+ *
+ * If for some reason it would return 0, that means the previously waiting
+ * task is already running, so it will observe condition true (or has already).
+ */
+void swake_up_locked(struct swait_queue_head *q)
+{
+	struct swait_queue *curr;
+
+	if (list_empty(&q->task_list))
+		return;
+
+	curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
+	wake_up_process(curr->task);
+	list_del_init(&curr->task_list);
+}
+EXPORT_SYMBOL(swake_up_locked);
+
+void swake_up(struct swait_queue_head *q)
+{
+	unsigned long flags;
+
+	if (!swait_active(q))
+		return;
+
+	raw_spin_lock_irqsave(&q->lock, flags);
+	swake_up_locked(q);
+	raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(swake_up);
+
+/*
+ * Does not allow usage from IRQ disabled, since we must be able to
+ * release IRQs to guarantee bounded hold time.
+ */
+void swake_up_all(struct swait_queue_head *q)
+{
+	struct swait_queue *curr;
+	LIST_HEAD(tmp);
+
+	if (!swait_active(q))
+		return;
+
+	raw_spin_lock_irq(&q->lock);
+	list_splice_init(&q->task_list, &tmp);
+	while (!list_empty(&tmp)) {
+		curr = list_first_entry(&tmp, typeof(*curr), task_list);
+
+		wake_up_state(curr->task, TASK_NORMAL);
+		list_del_init(&curr->task_list);
+
+		if (list_empty(&tmp))
+			break;
+
+		raw_spin_unlock_irq(&q->lock);
+		raw_spin_lock_irq(&q->lock);
+	}
+	raw_spin_unlock_irq(&q->lock);
+}
+EXPORT_SYMBOL(swake_up_all);
+
+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	wait->task = current;
+	if (list_empty(&wait->task_list))
+		list_add(&wait->task_list, &q->task_list);
+}
+
+void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&q->lock, flags);
+	__prepare_to_swait(q, wait);
+	set_current_state(state);
+	raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_swait);
+
+long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
+{
+	if (signal_pending_state(state, current))
+		return -ERESTARTSYS;
+
+	prepare_to_swait(q, wait, state);
+
+	return 0;
+}
+EXPORT_SYMBOL(prepare_to_swait_event);
+
+void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	__set_current_state(TASK_RUNNING);
+	if (!list_empty(&wait->task_list))
+		list_del_init(&wait->task_list);
+}
+
+void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	unsigned long flags;
+
+	__set_current_state(TASK_RUNNING);
+
+	if (!list_empty_careful(&wait->task_list)) {
+		raw_spin_lock_irqsave(&q->lock, flags);
+		list_del_init(&wait->task_list);
+		raw_spin_unlock_irqrestore(&q->lock, flags);
+	}
+}
+EXPORT_SYMBOL(finish_swait);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 580ac2d..15a1795 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -316,24 +316,24 @@
 		put_seccomp_filter(thread);
 		smp_store_release(&thread->seccomp.filter,
 				  caller->seccomp.filter);
+
+		/*
+		 * Don't let an unprivileged task work around
+		 * the no_new_privs restriction by creating
+		 * a thread that sets it up, enters seccomp,
+		 * then dies.
+		 */
+		if (task_no_new_privs(caller))
+			task_set_no_new_privs(thread);
+
 		/*
 		 * Opt the other thread into seccomp if needed.
 		 * As threads are considered to be trust-realm
 		 * equivalent (see ptrace_may_access), it is safe to
 		 * allow one thread to transition the other.
 		 */
-		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-			/*
-			 * Don't let an unprivileged task work around
-			 * the no_new_privs restriction by creating
-			 * a thread that sets it up, enters seccomp,
-			 * then dies.
-			 */
-			if (task_no_new_privs(caller))
-				task_set_no_new_privs(thread);
-
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
 			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-		}
 	}
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index f3f1f7a..0508544 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3508,8 +3508,10 @@
 	current->saved_sigmask = current->blocked;
 	set_current_blocked(set);
 
-	__set_current_state(TASK_INTERRUPTIBLE);
-	schedule();
+	while (!signal_pending(current)) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
 	set_restore_sigmask();
 	return -ERESTARTNOHAND;
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index d903c02..300d293 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -105,13 +105,12 @@
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static void csd_lock_wait(struct call_single_data *csd)
+static __always_inline void csd_lock_wait(struct call_single_data *csd)
 {
-	while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
-		cpu_relax();
+	smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
 }
 
-static void csd_lock(struct call_single_data *csd)
+static __always_inline void csd_lock(struct call_single_data *csd)
 {
 	csd_lock_wait(csd);
 	csd->flags |= CSD_FLAG_LOCK;
@@ -124,7 +123,7 @@
 	smp_wmb();
 }
 
-static void csd_unlock(struct call_single_data *csd)
+static __always_inline void csd_unlock(struct call_single_data *csd)
 {
 	WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 479e443..8aae49d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -116,9 +116,9 @@
 
 	if (preempt_count() == cnt) {
 #ifdef CONFIG_DEBUG_PREEMPT
-		current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
+		current->preempt_disable_ip = get_lock_parent_ip();
 #endif
-		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
+		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
 	}
 }
 EXPORT_SYMBOL(__local_bh_disable_ip);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 97715fd..f5102fab 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -350,6 +350,17 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_SCHEDSTATS
+	{
+		.procname	= "sched_schedstats",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_schedstats,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif /* CONFIG_SCHEDSTATS */
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_NUMA_BALANCING
 	{
@@ -505,7 +516,7 @@
 		.data		= &latencytop_enabled,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= sysctl_latencytop,
 	},
 #endif
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 435b885..fa909f9 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -897,10 +897,10 @@
  */
 static void __remove_hrtimer(struct hrtimer *timer,
 			     struct hrtimer_clock_base *base,
-			     unsigned long newstate, int reprogram)
+			     u8 newstate, int reprogram)
 {
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-	unsigned int state = timer->state;
+	u8 state = timer->state;
 
 	timer->state = newstate;
 	if (!(state & HRTIMER_STATE_ENQUEUED))
@@ -930,7 +930,7 @@
 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
 {
 	if (hrtimer_is_queued(timer)) {
-		unsigned long state = timer->state;
+		u8 state = timer->state;
 		int reprogram;
 
 		/*
@@ -954,6 +954,22 @@
 	return 0;
 }
 
+static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
+					    const enum hrtimer_mode mode)
+{
+#ifdef CONFIG_TIME_LOW_RES
+	/*
+	 * CONFIG_TIME_LOW_RES indicates that the system has no way to return
+	 * granular time values. For relative timers we add hrtimer_resolution
+	 * (i.e. one jiffie) to prevent short timeouts.
+	 */
+	timer->is_rel = mode & HRTIMER_MODE_REL;
+	if (timer->is_rel)
+		tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+#endif
+	return tim;
+}
+
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
  * @timer:	the timer to be added
@@ -974,19 +990,10 @@
 	/* Remove an active timer from the queue: */
 	remove_hrtimer(timer, base, true);
 
-	if (mode & HRTIMER_MODE_REL) {
+	if (mode & HRTIMER_MODE_REL)
 		tim = ktime_add_safe(tim, base->get_time());
-		/*
-		 * CONFIG_TIME_LOW_RES is a temporary way for architectures
-		 * to signal that they simply return xtime in
-		 * do_gettimeoffset(). In this case we want to round up by
-		 * resolution when starting a relative timer, to avoid short
-		 * timeouts. This will go away with the GTOD framework.
-		 */
-#ifdef CONFIG_TIME_LOW_RES
-		tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
-#endif
-	}
+
+	tim = hrtimer_update_lowres(timer, tim, mode);
 
 	hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
@@ -1074,19 +1081,23 @@
 /**
  * hrtimer_get_remaining - get remaining time for the timer
  * @timer:	the timer to read
+ * @adjust:	adjust relative timers when CONFIG_TIME_LOW_RES=y
  */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
 {
 	unsigned long flags;
 	ktime_t rem;
 
 	lock_hrtimer_base(timer, &flags);
-	rem = hrtimer_expires_remaining(timer);
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
+		rem = hrtimer_expires_remaining_adjusted(timer);
+	else
+		rem = hrtimer_expires_remaining(timer);
 	unlock_hrtimer_base(timer, &flags);
 
 	return rem;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_HZ_COMMON
 /**
@@ -1220,6 +1231,14 @@
 	fn = timer->function;
 
 	/*
+	 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
+	 * timer is restarted with a period then it becomes an absolute
+	 * timer. If its not restarted it does not matter.
+	 */
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES))
+		timer->is_rel = false;
+
+	/*
 	 * Because we run timers from hardirq context, there is no chance
 	 * they get migrated to another cpu, therefore its safe to unlock
 	 * the timer base.
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 8d262b4..1d5c720 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -26,7 +26,7 @@
  */
 static struct timeval itimer_get_remtime(struct hrtimer *timer)
 {
-	ktime_t rem = hrtimer_get_remaining(timer);
+	ktime_t rem = __hrtimer_get_remaining(timer, true);
 
 	/*
 	 * Racy but safe: if the itimer expires after the above
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 36f2ca0..6df8927 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -685,8 +685,18 @@
 		if (!capable(CAP_SYS_TIME))
 			return -EPERM;
 
-		if (!timeval_inject_offset_valid(&txc->time))
-			return -EINVAL;
+		if (txc->modes & ADJ_NANO) {
+			struct timespec ts;
+
+			ts.tv_sec = txc->time.tv_sec;
+			ts.tv_nsec = txc->time.tv_usec;
+			if (!timespec_inject_offset_valid(&ts))
+				return -EINVAL;
+
+		} else {
+			if (!timeval_inject_offset_valid(&txc->time))
+				return -EINVAL;
+		}
 	}
 
 	/*
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index f5e86d2..1cafba8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -333,7 +333,6 @@
 	return err;
 }
 
-
 /*
  * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
  * This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -517,6 +516,10 @@
 				cputime_expires->sched_exp = exp;
 			break;
 		}
+		if (CPUCLOCK_PERTHREAD(timer->it_clock))
+			tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
+		else
+			tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
 	}
 }
 
@@ -582,39 +585,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static void nohz_kick_work_fn(struct work_struct *work)
-{
-	tick_nohz_full_kick_all();
-}
-
-static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
-
-/*
- * We need the IPIs to be sent from sane process context.
- * The posix cpu timers are always set with irqs disabled.
- */
-static void posix_cpu_timer_kick_nohz(void)
-{
-	if (context_tracking_is_enabled())
-		schedule_work(&nohz_kick_work);
-}
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
-{
-	if (!task_cputime_zero(&tsk->cputime_expires))
-		return false;
-
-	/* Check if cputimer is running. This is accessed without locking. */
-	if (READ_ONCE(tsk->signal->cputimer.running))
-		return false;
-
-	return true;
-}
-#else
-static inline void posix_cpu_timer_kick_nohz(void) { }
-#endif
-
 /*
  * Guts of sys_timer_settime for CPU timers.
  * This is called with the timer locked and interrupts disabled.
@@ -761,8 +731,7 @@
 		sample_to_timespec(timer->it_clock,
 				   old_incr, &old->it_interval);
 	}
-	if (!ret)
-		posix_cpu_timer_kick_nohz();
+
 	return ret;
 }
 
@@ -911,6 +880,8 @@
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
+	if (task_cputime_zero(tsk_expires))
+		tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static inline void stop_process_timers(struct signal_struct *sig)
@@ -919,6 +890,7 @@
 
 	/* Turn off cputimer->running. This is done without locking. */
 	WRITE_ONCE(cputimer->running, false);
+	tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static u32 onecputick;
@@ -1095,8 +1067,6 @@
 	arm_timer(timer);
 	unlock_task_sighand(p, &flags);
 
-	/* Kick full dynticks CPUs in case they need to tick on the new timer */
-	posix_cpu_timer_kick_nohz();
 out:
 	timer->it_overrun_last = timer->it_overrun;
 	timer->it_overrun = -1;
@@ -1270,7 +1240,7 @@
 		}
 
 		if (!*newval)
-			goto out;
+			return;
 		*newval += now;
 	}
 
@@ -1288,8 +1258,8 @@
 			tsk->signal->cputime_expires.virt_exp = *newval;
 		break;
 	}
-out:
-	posix_cpu_timer_kick_nohz();
+
+	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 31d11ac..f2826c3 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -760,7 +760,7 @@
 	    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
 		timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
-	remaining = ktime_sub(hrtimer_get_expires(timer), now);
+	remaining = __hrtimer_expires_remaining_adjusted(timer, now);
 	/* Return 0 only, when the timer is expired and not pending */
 	if (remaining.tv64 <= 0) {
 		/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9d7a053..969e670 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
-#include <linux/perf_event.h>
 #include <linux/context_tracking.h>
 
 #include <asm/irq_regs.h>
@@ -36,16 +35,17 @@
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
-/*
- * The time, when the last jiffy update happened. Protected by jiffies_lock.
- */
-static ktime_t last_jiffies_update;
-
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
 	return &per_cpu(tick_cpu_sched, cpu);
 }
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+/*
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
+ */
+static ktime_t last_jiffies_update;
+
 /*
  * Must be called with interrupts disabled !
  */
@@ -151,59 +151,69 @@
 	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
 cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
+static unsigned long tick_dep_mask;
 
-static bool can_stop_full_tick(void)
+static void trace_tick_dependency(unsigned long dep)
+{
+	if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_SCHED) {
+		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+}
+
+static bool can_stop_full_tick(struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
-	if (!sched_can_stop_tick()) {
-		trace_tick_stop(0, "more than 1 task in runqueue\n");
+	if (tick_dep_mask) {
+		trace_tick_dependency(tick_dep_mask);
 		return false;
 	}
 
-	if (!posix_cpu_timers_can_stop_tick(current)) {
-		trace_tick_stop(0, "posix timers running\n");
+	if (ts->tick_dep_mask) {
+		trace_tick_dependency(ts->tick_dep_mask);
 		return false;
 	}
 
-	if (!perf_event_can_stop_tick()) {
-		trace_tick_stop(0, "perf events running\n");
+	if (current->tick_dep_mask) {
+		trace_tick_dependency(current->tick_dep_mask);
 		return false;
 	}
 
-	/* sched_clock_tick() needs us? */
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-	/*
-	 * TODO: kick full dynticks CPUs when
-	 * sched_clock_stable is set.
-	 */
-	if (!sched_clock_stable()) {
-		trace_tick_stop(0, "unstable sched clock\n");
-		/*
-		 * Don't allow the user to think they can get
-		 * full NO_HZ with this machine.
-		 */
-		WARN_ONCE(tick_nohz_full_running,
-			  "NO_HZ FULL will not work with unstable sched clock");
+	if (current->signal->tick_dep_mask) {
+		trace_tick_dependency(current->signal->tick_dep_mask);
 		return false;
 	}
-#endif
 
 	return true;
 }
 
-static void nohz_full_kick_work_func(struct irq_work *work)
+static void nohz_full_kick_func(struct irq_work *work)
 {
 	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
 }
 
 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
-	.func = nohz_full_kick_work_func,
+	.func = nohz_full_kick_func,
 };
 
 /*
@@ -212,7 +222,7 @@
  * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
  * is NMI safe.
  */
-void tick_nohz_full_kick(void)
+static void tick_nohz_full_kick(void)
 {
 	if (!tick_nohz_full_cpu(smp_processor_id()))
 		return;
@@ -232,27 +242,112 @@
 	irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
 }
 
-static void nohz_full_kick_ipi(void *info)
-{
-	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
-}
-
 /*
  * Kick all full dynticks CPUs in order to force these to re-evaluate
  * their dependency on the tick and restart it if necessary.
  */
-void tick_nohz_full_kick_all(void)
+static void tick_nohz_full_kick_all(void)
 {
+	int cpu;
+
 	if (!tick_nohz_full_running)
 		return;
 
 	preempt_disable();
-	smp_call_function_many(tick_nohz_full_mask,
-			       nohz_full_kick_ipi, NULL, false);
-	tick_nohz_full_kick();
+	for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
+		tick_nohz_full_kick_cpu(cpu);
 	preempt_enable();
 }
 
+static void tick_nohz_dep_set_all(unsigned long *dep,
+				  enum tick_dep_bits bit)
+{
+	unsigned long prev;
+
+	prev = fetch_or(dep, BIT_MASK(bit));
+	if (!prev)
+		tick_nohz_full_kick_all();
+}
+
+/*
+ * Set a global tick dependency. Used by perf events that rely on freq and
+ * by unstable clock.
+ */
+void tick_nohz_dep_set(enum tick_dep_bits bit)
+{
+	tick_nohz_dep_set_all(&tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear(enum tick_dep_bits bit)
+{
+	clear_bit(bit, &tick_dep_mask);
+}
+
+/*
+ * Set per-CPU tick dependency. Used by scheduler and perf events in order to
+ * manage events throttling.
+ */
+void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+	unsigned long prev;
+	struct tick_sched *ts;
+
+	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+	prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+	if (!prev) {
+		preempt_disable();
+		/* Perf needs local kick that is NMI safe */
+		if (cpu == smp_processor_id()) {
+			tick_nohz_full_kick();
+		} else {
+			/* Remote irq work not NMI-safe */
+			if (!WARN_ON_ONCE(in_nmi()))
+				tick_nohz_full_kick_cpu(cpu);
+		}
+		preempt_enable();
+	}
+}
+
+void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+	clear_bit(bit, &ts->tick_dep_mask);
+}
+
+/*
+ * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
+ * per task timers.
+ */
+void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+	/*
+	 * We could optimize this with just kicking the target running the task
+	 * if that noise matters for nohz full users.
+	 */
+	tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+	clear_bit(bit, &tsk->tick_dep_mask);
+}
+
+/*
+ * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
+ * per process timers.
+ */
+void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+	tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+	clear_bit(bit, &sig->tick_dep_mask);
+}
+
 /*
  * Re-evaluate the need for the tick as we switch the current task.
  * It might need the tick due to per task/process properties:
@@ -261,15 +356,19 @@
 void __tick_nohz_task_switch(void)
 {
 	unsigned long flags;
+	struct tick_sched *ts;
 
 	local_irq_save(flags);
 
 	if (!tick_nohz_full_cpu(smp_processor_id()))
 		goto out;
 
-	if (tick_nohz_tick_stopped() && !can_stop_full_tick())
-		tick_nohz_full_kick();
+	ts = this_cpu_ptr(&tick_cpu_sched);
 
+	if (ts->tick_stopped) {
+		if (current->tick_dep_mask || current->signal->tick_dep_mask)
+			tick_nohz_full_kick();
+	}
 out:
 	local_irq_restore(flags);
 }
@@ -687,7 +786,7 @@
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 		ts->tick_stopped = 1;
-		trace_tick_stop(1, " ");
+		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
 	/*
@@ -738,7 +837,7 @@
 	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
 		return;
 
-	if (can_stop_full_tick())
+	if (can_stop_full_tick(ts))
 		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
@@ -993,9 +1092,9 @@
 	/* Get the next period */
 	next = tick_init_jiffy_update();
 
-	hrtimer_forward_now(&ts->sched_timer, tick_period);
 	hrtimer_set_expires(&ts->sched_timer, next);
-	tick_program_event(next, 1);
+	hrtimer_forward_now(&ts->sched_timer, tick_period);
+	tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
 	tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e..eb4e325 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,6 +60,7 @@
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;
+	unsigned long			tick_dep_mask;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index f75e35b..ba7d8b2 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -69,7 +69,7 @@
 	print_name_offset(m, taddr);
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->function);
-	SEQ_printf(m, ", S:%02lx", timer->state);
+	SEQ_printf(m, ", S:%02x", timer->state);
 #ifdef CONFIG_TIMER_STATS
 	SEQ_printf(m, ", ");
 	print_name_offset(m, timer->start_site);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 45dd798..326a75e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -191,14 +191,17 @@
 	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct perf_event *event;
+	struct file *file;
 
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	event = (struct perf_event *)array->ptrs[index];
-	if (!event)
+	file = (struct file *)array->ptrs[index];
+	if (unlikely(!file))
 		return -ENOENT;
 
+	event = file->private_data;
+
 	/* make sure event is local and doesn't have pmu::count */
 	if (event->oncpu != smp_processor_id() ||
 	    event->pmu->count)
@@ -228,6 +231,7 @@
 	void *data = (void *) (long) r4;
 	struct perf_sample_data sample_data;
 	struct perf_event *event;
+	struct file *file;
 	struct perf_raw_record raw = {
 		.size = size,
 		.data = data,
@@ -236,10 +240,12 @@
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	event = (struct perf_event *)array->ptrs[index];
-	if (unlikely(!event))
+	file = (struct file *)array->ptrs[index];
+	if (unlikely(!file))
 		return -ENOENT;
 
+	event = file->private_data;
+
 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
 		return -EINVAL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eca592f..57a6eea 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4961,7 +4961,7 @@
 	mutex_unlock(&ftrace_lock);
 }
 
-static void ftrace_module_enable(struct module *mod)
+void ftrace_module_enable(struct module *mod)
 {
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
@@ -5038,38 +5038,8 @@
 	ftrace_process_locs(mod, mod->ftrace_callsites,
 			    mod->ftrace_callsites + mod->num_ftrace_callsites);
 }
-
-static int ftrace_module_notify(struct notifier_block *self,
-				unsigned long val, void *data)
-{
-	struct module *mod = data;
-
-	switch (val) {
-	case MODULE_STATE_COMING:
-		ftrace_module_enable(mod);
-		break;
-	case MODULE_STATE_GOING:
-		ftrace_release_mod(mod);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-#else
-static int ftrace_module_notify(struct notifier_block *self,
-				unsigned long val, void *data)
-{
-	return 0;
-}
 #endif /* CONFIG_MODULES */
 
-struct notifier_block ftrace_module_nb = {
-	.notifier_call = ftrace_module_notify,
-	.priority = INT_MIN,	/* Run after anything that can remove kprobes */
-};
-
 void __init ftrace_init(void)
 {
 	extern unsigned long __start_mcount_loc[];
@@ -5098,10 +5068,6 @@
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
-	ret = register_module_notifier(&ftrace_module_nb);
-	if (ret)
-		pr_warning("Failed to register trace ftrace module exit notifier\n");
-
 	set_ftrace_early_filters();
 
 	return;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87fb980..d929340 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1751,7 +1751,7 @@
 {
 	__buffer_unlock_commit(buffer, event);
 
-	ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f333e57..05ddc08 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -97,16 +97,16 @@
 	struct ftrace_event_field *field;
 	struct list_head *head;
 
+	head = trace_get_fields(call);
+	field = __find_event_field(head, name);
+	if (field)
+		return field;
+
 	field = __find_event_field(&ftrace_generic_fields, name);
 	if (field)
 		return field;
 
-	field = __find_event_field(&ftrace_common_fields, name);
-	if (field)
-		return field;
-
-	head = trace_get_fields(call);
-	return __find_event_field(head, name);
+	return __find_event_field(&ftrace_common_fields, name);
 }
 
 static int __trace_define_field(struct list_head *head, const char *type,
@@ -171,8 +171,10 @@
 {
 	int ret;
 
-	__generic_field(int, cpu, FILTER_OTHER);
-	__generic_field(char *, comm, FILTER_PTR_STRING);
+	__generic_field(int, CPU, FILTER_CPU);
+	__generic_field(int, cpu, FILTER_CPU);
+	__generic_field(char *, COMM, FILTER_COMM);
+	__generic_field(char *, comm, FILTER_COMM);
 
 	return ret;
 }
@@ -869,7 +871,8 @@
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
 		 */
-		if (call->class && call->class->reg)
+		if (call->class && call->class->reg &&
+		    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
 			return file;
 	}
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f93a219..6816302 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1043,13 +1043,14 @@
 		return -EINVAL;
 	}
 
-	if (is_string_field(field)) {
+	if (field->filter_type == FILTER_COMM) {
+		filter_build_regex(pred);
+		fn = filter_pred_comm;
+		pred->regex.field_len = TASK_COMM_LEN;
+	} else if (is_string_field(field)) {
 		filter_build_regex(pred);
 
-		if (!strcmp(field->name, "comm")) {
-			fn = filter_pred_comm;
-			pred->regex.field_len = TASK_COMM_LEN;
-		} else if (field->filter_type == FILTER_STATIC_STRING) {
+		if (field->filter_type == FILTER_STATIC_STRING) {
 			fn = filter_pred_string;
 			pred->regex.field_len = field->size;
 		} else if (field->filter_type == FILTER_DYN_STRING)
@@ -1072,7 +1073,7 @@
 		}
 		pred->val = val;
 
-		if (!strcmp(field->name, "cpu"))
+		if (field->filter_type == FILTER_CPU)
 			fn = filter_pred_cpu;
 		else
 			fn = select_comparison_fn(pred->op, field->size,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c995644..21b81a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -30,7 +30,7 @@
 struct trace_kprobe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long 		nhit;
+	unsigned long __percpu *nhit;
 	const char		*symbol;	/* symbol name */
 	struct trace_probe	tp;
 };
@@ -274,6 +274,10 @@
 	if (!tk)
 		return ERR_PTR(ret);
 
+	tk->nhit = alloc_percpu(unsigned long);
+	if (!tk->nhit)
+		goto error;
+
 	if (symbol) {
 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
 		if (!tk->symbol)
@@ -313,6 +317,7 @@
 error:
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 	return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@
 	kfree(tk->tp.call.class->system);
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 }
 
@@ -874,9 +880,14 @@
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct trace_kprobe *tk = v;
+	unsigned long nhit = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		nhit += *per_cpu_ptr(tk->nhit, cpu);
 
 	seq_printf(m, "  %-44s %15lu %15lu\n",
-		   trace_event_name(&tk->tp.call), tk->nhit,
+		   trace_event_name(&tk->tp.call), nhit,
 		   tk->rp.kp.nmissed);
 
 	return 0;
@@ -1225,7 +1236,7 @@
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(tk, ri, regs);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index dda9e67..2a1abba 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -126,6 +126,13 @@
 	}
 
 	/*
+	 * Some archs may not have the passed in ip in the dump.
+	 * If that happens, we need to show everything.
+	 */
+	if (i == stack_trace_max.nr_entries)
+		i = 0;
+
+	/*
 	 * Now find where in the stack these are.
 	 */
 	x = 0;
@@ -149,7 +156,11 @@
 		for (; p < top && i < stack_trace_max.nr_entries; p++) {
 			if (stack_dump_trace[i] == ULONG_MAX)
 				break;
-			if (*p == stack_dump_trace[i]) {
+			/*
+			 * The READ_ONCE_NOCHECK is used to let KASAN know that
+			 * this is not a stack-out-of-bounds error.
+			 */
+			if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
 				stack_dump_trace[x] = stack_dump_trace[i++];
 				this_size = stack_trace_index[x++] =
 					(top - p) * sizeof(unsigned long);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 0655afb..d166308 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -186,11 +186,11 @@
 
 extern char *__bad_type_size(void);
 
-#define SYSCALL_FIELD(type, name)					\
-	sizeof(type) != sizeof(trace.name) ?				\
+#define SYSCALL_FIELD(type, field, name)				\
+	sizeof(type) != sizeof(trace.field) ?				\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name),		\
-		sizeof(trace.name), is_signed_type(type)
+		#type, #name, offsetof(typeof(trace), field),		\
+		sizeof(trace.field), is_signed_type(type)
 
 static int __init
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@@ -261,7 +261,8 @@
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 		return ret;
 
@@ -281,11 +282,12 @@
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
 				 FILTER_OTHER);
 
 	return ret;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 975cb49..f8e26ab 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -93,9 +93,11 @@
 {
 	struct mm_struct *mm;
 
-	/* convert pages-usec to Mbyte-usec */
-	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
-	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
+	/* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */
+	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE;
+	do_div(stats->coremem, 1000 * KB);
+	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE;
+	do_div(stats->virtmem, 1000 * KB);
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
@@ -123,27 +125,28 @@
 static void __acct_update_integrals(struct task_struct *tsk,
 				    cputime_t utime, cputime_t stime)
 {
-	if (likely(tsk->mm)) {
-		cputime_t time, dtime;
-		struct timeval value;
-		unsigned long flags;
-		u64 delta;
+	cputime_t time, dtime;
+	u64 delta;
 
-		local_irq_save(flags);
-		time = stime + utime;
-		dtime = time - tsk->acct_timexpd;
-		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
-		delta = value.tv_sec;
-		delta = delta * USEC_PER_SEC + value.tv_usec;
+	if (!likely(tsk->mm))
+		return;
 
-		if (delta == 0)
-			goto out;
-		tsk->acct_timexpd = time;
-		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
-		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
-	out:
-		local_irq_restore(flags);
-	}
+	time = stime + utime;
+	dtime = time - tsk->acct_timexpd;
+	/* Avoid division: cputime_t is often in nanoseconds already. */
+	delta = cputime_to_nsecs(dtime);
+
+	if (delta < TICK_NSEC)
+		return;
+
+	tsk->acct_timexpd = time;
+	/*
+	 * Divide by 1024 to avoid overflow, and to avoid division.
+	 * The final unit reported to userspace is Mbyte-usecs,
+	 * the rest of the math is done in xacct_add_tsk.
+	 */
+	tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
+	tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
 }
 
 /**
@@ -153,9 +156,12 @@
 void acct_update_integrals(struct task_struct *tsk)
 {
 	cputime_t utime, stime;
+	unsigned long flags;
 
+	local_irq_save(flags);
 	task_cputime(tsk, &utime, &stime);
 	__acct_update_integrals(tsk, utime, stime);
+	local_irq_restore(flags);
 }
 
 /**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 61a0264..7ff5dc7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,7 +301,23 @@
 static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
 
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+
+/*
+ * Local execution of unbound work items is no longer guaranteed.  The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@
 						  int node)
 {
 	assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+	/*
+	 * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+	 * delayed item is pending.  The plan is to keep CPU -> NODE
+	 * mapping valid and stable across CPU on/offlines.  Once that
+	 * happens, this workaround can be removed.
+	 */
+	if (unlikely(node == NUMA_NO_NODE))
+		return wq->dfl_pwq;
+
 	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 }
 
@@ -1298,6 +1324,39 @@
 	return worker && worker->current_pwq->wq == wq;
 }
 
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+	static bool printed_dbg_warning;
+	int new_cpu;
+
+	if (likely(!wq_debug_force_rr_cpu)) {
+		if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+			return cpu;
+	} else if (!printed_dbg_warning) {
+		pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+		printed_dbg_warning = true;
+	}
+
+	if (cpumask_empty(wq_unbound_cpumask))
+		return cpu;
+
+	new_cpu = __this_cpu_read(wq_rr_cpu_last);
+	new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+	if (unlikely(new_cpu >= nr_cpu_ids)) {
+		new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+		if (unlikely(new_cpu >= nr_cpu_ids))
+			return cpu;
+	}
+	__this_cpu_write(wq_rr_cpu_last, new_cpu);
+
+	return new_cpu;
+}
+
 static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -1323,7 +1382,7 @@
 		return;
 retry:
 	if (req_cpu == WORK_CPU_UNBOUND)
-		cpu = raw_smp_processor_id();
+		cpu = wq_select_unbound_cpu(raw_smp_processor_id());
 
 	/* pwq which will be used unless @work is executing elsewhere */
 	if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@
 	timer_stats_timer_set_start_info(&dwork->timer);
 
 	dwork->wq = wq;
-	/* timer isn't guaranteed to run in this cpu, record earlier */
-	if (cpu == WORK_CPU_UNBOUND)
-		cpu = raw_smp_processor_id();
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
-	add_timer_on(timer, cpu);
+	if (unlikely(cpu != WORK_CPU_UNBOUND))
+		add_timer_on(timer, cpu);
+	else
+		add_timer(timer);
 }
 
 /**
@@ -2355,7 +2414,8 @@
 	WARN_ONCE(current->flags & PF_MEMALLOC,
 		  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
 		  current->pid, current->comm, target_wq->name, target_func);
-	WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+	WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+			      (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
 		  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
 		  worker->current_pwq->wq->name, worker->current_func,
 		  target_wq->name, target_func);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ecb9e75..8bfd1ac 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1400,6 +1400,21 @@
 
 endmenu # "RCU Debugging"
 
+config DEBUG_WQ_FORCE_RR_CPU
+	bool "Force round-robin CPU selection for unbound work items"
+	depends on DEBUG_KERNEL
+	default n
+	help
+	  Workqueue used to implicitly guarantee that work items queued
+	  without explicit CPU specified are put on the local CPU.  This
+	  guarantee is no longer true and while local CPU is still
+	  preferred work items may be put on foreign CPUs.  Kernel
+	  parameter "workqueue.debug_force_rr_cpu" is added to force
+	  round-robin CPU selection to flush out usages which depend on the
+	  now broken guarantee.  This config option enables the debug
+	  feature by default.  When enabled, memory and cache locality will
+	  be impacted.
+
 config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
 	depends on DEBUG_KERNEL
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 49518fb..e07c1ba 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -18,6 +18,8 @@
 	  This option activates instrumentation for the entire kernel.
 	  If you don't enable this option, you have to explicitly specify
 	  UBSAN_SANITIZE := y for the files/directories you want to check for UB.
+	  Enabling this option will get kernel image size increased
+	  significantly.
 
 config UBSAN_ALIGNMENT
 	bool "Enable checking of pointers alignment"
@@ -25,5 +27,5 @@
 	default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS
 	help
 	  This option enables detection of unaligned memory accesses.
-	  Enabling this option on architectures that support unalligned
+	  Enabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d62de8b..1234818 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -17,7 +17,7 @@
 #include <linux/atomic.h>
 
 #ifdef CONFIG_X86
-#include <asm/processor.h>	/* for boot_cpu_has below */
+#include <asm/cpufeature.h>	/* for boot_cpu_has below */
 #endif
 
 #define TEST(bit, op, c_op, val)				\
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5a70f61..81dedaa 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -41,6 +41,7 @@
 			break;
 	return i;
 }
+EXPORT_SYMBOL(cpumask_any_but);
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 547f7f9..519b5a1 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE	512
+#define ODEBUG_POOL_SIZE	1024
 #define ODEBUG_POOL_MIN_LEVEL	256
 
 #define ODEBUG_CHUNK_SHIFT	PAGE_SHIFT
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 6745c62..c30d07e 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -25,6 +25,7 @@
 
 asmlinkage __visible void dump_stack(void)
 {
+	unsigned long flags;
 	int was_locked;
 	int old;
 	int cpu;
@@ -33,9 +34,8 @@
 	 * Permit this cpu to perform nested stack dumps while serialising
 	 * against other CPUs
 	 */
-	preempt_disable();
-
 retry:
+	local_irq_save(flags);
 	cpu = smp_processor_id();
 	old = atomic_cmpxchg(&dump_lock, -1, cpu);
 	if (old == -1) {
@@ -43,6 +43,7 @@
 	} else if (old == cpu) {
 		was_locked = 1;
 	} else {
+		local_irq_restore(flags);
 		cpu_relax();
 		goto retry;
 	}
@@ -52,7 +53,7 @@
 	if (!was_locked)
 		atomic_set(&dump_lock, -1);
 
-	preempt_enable();
+	local_irq_restore(flags);
 }
 #else
 asmlinkage __visible void dump_stack(void)
diff --git a/lib/klist.c b/lib/klist.c
index d74cf7a..0507fa5 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -282,9 +282,9 @@
 			  struct klist_node *n)
 {
 	i->i_klist = k;
-	i->i_cur = n;
-	if (n)
-		kref_get(&n->n_ref);
+	i->i_cur = NULL;
+	if (n && kref_get_unless_zero(&n->n_ref))
+		i->i_cur = n;
 }
 EXPORT_SYMBOL_GPL(klist_iter_init_node);
 
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 3345a08..3859bf6 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -12,13 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/rculist.h>
 
-static struct list_head force_poison;
-void list_force_poison(struct list_head *entry)
-{
-	entry->next = &force_poison;
-	entry->prev = &force_poison;
-}
-
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -30,8 +23,6 @@
 			      struct list_head *prev,
 			      struct list_head *next)
 {
-	WARN(new->next == &force_poison || new->prev == &force_poison,
-		"list_add attempted on force-poisoned entry\n");
 	WARN(next->prev != prev,
 		"list_add corruption. next->prev should be "
 		"prev (%p), but was %p. (next=%p).\n",
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index fcf5d98..6b79e90 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1019,9 +1019,13 @@
 		return 0;
 
 	radix_tree_for_each_slot(slot, root, &iter, first_index) {
-		results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+		results[ret] = rcu_dereference_raw(*slot);
 		if (!results[ret])
 			continue;
+		if (radix_tree_is_indirect_ptr(results[ret])) {
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
 		if (++ret == max_items)
 			break;
 	}
@@ -1098,9 +1102,13 @@
 		return 0;
 
 	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
-		results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+		results[ret] = rcu_dereference_raw(*slot);
 		if (!results[ret])
 			continue;
+		if (radix_tree_is_indirect_ptr(results[ret])) {
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
 		if (++ret == max_items)
 			break;
 	}
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index bafa993..004fc70 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -598,9 +598,9 @@
  *
  * Description:
  *   Stops mapping iterator @miter.  @miter should have been started
- *   started using sg_miter_start().  A stopped iteration can be
- *   resumed by calling sg_miter_next() on it.  This is useful when
- *   resources (kmap) need to be released during iteration.
+ *   using sg_miter_start().  A stopped iteration can be resumed by
+ *   calling sg_miter_next() on it.  This is useful when resources (kmap)
+ *   need to be released during iteration.
  *
  * Context:
  *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 98866a7..25b5cbf 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -327,36 +327,67 @@
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
 	do {                                                                   \
-		BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-		__test_string_get_size((size), (blk_size), (units),            \
-				       (exp_result));                          \
+		BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+		BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+		__test_string_get_size((size), (blk_size), (exp_result10),     \
+				       (exp_result2));                         \
 	} while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-					  const enum string_size_units units,
-					  const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+					      const char *exp,
+					      char *res,
+					      const u64 size,
+					      const u64 blk_size)
 {
-	char buf[string_get_size_maxbuf];
-
-	string_get_size(size, blk_size, units, buf, sizeof(buf));
-	if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+	if (!memcmp(res, exp, strlen(exp) + 1))
 		return;
 
-	buf[sizeof(buf) - 1] = '\0';
-	pr_warn("Test 'test_string_get_size_one' failed!\n");
-	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+	res[string_get_size_maxbuf - 1] = '\0';
+
+	pr_warn("Test 'test_string_get_size' failed!\n");
+	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
 		size, blk_size, units);
-	pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+	pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+					  const char *exp_result10,
+					  const char *exp_result2)
+{
+	char buf10[string_get_size_maxbuf];
+	char buf2[string_get_size_maxbuf];
+
+	string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+	string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+	test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+				   size, blk_size);
+
+	test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+				   size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-	test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-	test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-	test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+	/* small values */
+	test_string_get_size_one(0, 512, "0 B", "0 B");
+	test_string_get_size_one(1, 512, "512 B", "512 B");
+	test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+	/* normal values */
+	test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+	test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+	test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+	/* weird block sizes */
+	test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+	/* huge values */
+	test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+	test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)
diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
index c61b299..915d75d 100644
--- a/lib/test_static_keys.c
+++ b/lib/test_static_keys.c
@@ -46,8 +46,11 @@
 	bool			(*test_key)(void);
 };
 
-#define test_key_func(key, branch) \
-	({bool func(void) { return branch(key); } func;	})
+#define test_key_func(key, branch)	\
+static bool key ## _ ## branch(void)	\
+{					\
+	return branch(&key);		\
+}
 
 static void invert_key(struct static_key *key)
 {
@@ -92,6 +95,25 @@
 	return 0;
 }
 
+test_key_func(old_true_key, static_key_true)
+test_key_func(old_false_key, static_key_false)
+test_key_func(true_key, static_branch_likely)
+test_key_func(true_key, static_branch_unlikely)
+test_key_func(false_key, static_branch_likely)
+test_key_func(false_key, static_branch_unlikely)
+test_key_func(base_old_true_key, static_key_true)
+test_key_func(base_inv_old_true_key, static_key_true)
+test_key_func(base_old_false_key, static_key_false)
+test_key_func(base_inv_old_false_key, static_key_false)
+test_key_func(base_true_key, static_branch_likely)
+test_key_func(base_true_key, static_branch_unlikely)
+test_key_func(base_inv_true_key, static_branch_likely)
+test_key_func(base_inv_true_key, static_branch_unlikely)
+test_key_func(base_false_key, static_branch_likely)
+test_key_func(base_false_key, static_branch_unlikely)
+test_key_func(base_inv_false_key, static_branch_likely)
+test_key_func(base_inv_false_key, static_branch_unlikely)
+
 static int __init test_static_key_init(void)
 {
 	int ret;
@@ -102,95 +124,95 @@
 		{
 			.init_state	= true,
 			.key		= &old_true_key,
-			.test_key	= test_key_func(&old_true_key, static_key_true),
+			.test_key	= &old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &old_false_key,
-			.test_key	= test_key_func(&old_false_key, static_key_false),
+			.test_key	= &old_false_key_static_key_false,
 		},
 		/* internal keys - new keys */
 		{
 			.init_state	= true,
 			.key		= &true_key.key,
-			.test_key	= test_key_func(&true_key, static_branch_likely),
+			.test_key	= &true_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &true_key.key,
-			.test_key	= test_key_func(&true_key, static_branch_unlikely),
+			.test_key	= &true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &false_key.key,
-			.test_key	= test_key_func(&false_key, static_branch_likely),
+			.test_key	= &false_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &false_key.key,
-			.test_key	= test_key_func(&false_key, static_branch_unlikely),
+			.test_key	= &false_key_static_branch_unlikely,
 		},
 		/* external keys - old keys */
 		{
 			.init_state	= true,
 			.key		= &base_old_true_key,
-			.test_key	= test_key_func(&base_old_true_key, static_key_true),
+			.test_key	= &base_old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_old_true_key,
-			.test_key	= test_key_func(&base_inv_old_true_key, static_key_true),
+			.test_key	= &base_inv_old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_old_false_key,
-			.test_key	= test_key_func(&base_old_false_key, static_key_false),
+			.test_key	= &base_old_false_key_static_key_false,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_old_false_key,
-			.test_key	= test_key_func(&base_inv_old_false_key, static_key_false),
+			.test_key	= &base_inv_old_false_key_static_key_false,
 		},
 		/* external keys - new keys */
 		{
 			.init_state	= true,
 			.key		= &base_true_key.key,
-			.test_key	= test_key_func(&base_true_key, static_branch_likely),
+			.test_key	= &base_true_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_true_key.key,
-			.test_key	= test_key_func(&base_true_key, static_branch_unlikely),
+			.test_key	= &base_true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_true_key.key,
-			.test_key	= test_key_func(&base_inv_true_key, static_branch_likely),
+			.test_key	= &base_inv_true_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_true_key.key,
-			.test_key	= test_key_func(&base_inv_true_key, static_branch_unlikely),
+			.test_key	= &base_inv_true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_false_key.key,
-			.test_key	= test_key_func(&base_false_key, static_branch_likely),
+			.test_key	= &base_false_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_false_key.key,
-			.test_key	= test_key_func(&base_false_key, static_branch_unlikely),
+			.test_key	= &base_false_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_false_key.key,
-			.test_key	= test_key_func(&base_inv_false_key, static_branch_likely),
+			.test_key	= &base_inv_false_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_false_key.key,
-			.test_key	= test_key_func(&base_inv_false_key, static_branch_unlikely),
+			.test_key	= &base_inv_false_key_static_branch_unlikely,
 		},
 	};
 
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index 6f500ef..f0b323a 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -49,3 +49,65 @@
         }
 }
 EXPORT_SYMBOL(ucs2_strncmp);
+
+unsigned long
+ucs2_utf8size(const ucs2_char_t *src)
+{
+	unsigned long i;
+	unsigned long j = 0;
+
+	for (i = 0; i < ucs2_strlen(src); i++) {
+		u16 c = src[i];
+
+		if (c >= 0x800)
+			j += 3;
+		else if (c >= 0x80)
+			j += 2;
+		else
+			j += 1;
+	}
+
+	return j;
+}
+EXPORT_SYMBOL(ucs2_utf8size);
+
+/*
+ * copy at most maxlength bytes of whole utf8 characters to dest from the
+ * ucs2 string src.
+ *
+ * The return value is the number of characters copied, not including the
+ * final NUL character.
+ */
+unsigned long
+ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
+{
+	unsigned int i;
+	unsigned long j = 0;
+	unsigned long limit = ucs2_strnlen(src, maxlength);
+
+	for (i = 0; maxlength && i < limit; i++) {
+		u16 c = src[i];
+
+		if (c >= 0x800) {
+			if (maxlength < 3)
+				break;
+			maxlength -= 3;
+			dest[j++] = 0xe0 | (c & 0xf000) >> 12;
+			dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
+			dest[j++] = 0x80 | (c & 0x003f);
+		} else if (c >= 0x80) {
+			if (maxlength < 2)
+				break;
+			maxlength -= 2;
+			dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
+			dest[j++] = 0x80 | (c & 0x03f);
+		} else {
+			maxlength -= 1;
+			dest[j++] = c & 0x7f;
+		}
+	}
+	if (maxlength)
+		dest[j] = '\0';
+	return j;
+}
+EXPORT_SYMBOL(ucs2_as_utf8);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 48ff9c3..f44e178 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1590,22 +1590,23 @@
 			return buf;
 		}
 	case 'K':
-		/*
-		 * %pK cannot be used in IRQ context because its test
-		 * for CAP_SYSLOG would be meaningless.
-		 */
-		if (kptr_restrict && (in_irq() || in_serving_softirq() ||
-				      in_nmi())) {
-			if (spec.field_width == -1)
-				spec.field_width = default_width;
-			return string(buf, end, "pK-error", spec);
-		}
-
 		switch (kptr_restrict) {
 		case 0:
 			/* Always print %pK values */
 			break;
 		case 1: {
+			const struct cred *cred;
+
+			/*
+			 * kptr_restrict==1 cannot be used in IRQ context
+			 * because its test for CAP_SYSLOG would be meaningless.
+			 */
+			if (in_irq() || in_serving_softirq() || in_nmi()) {
+				if (spec.field_width == -1)
+					spec.field_width = default_width;
+				return string(buf, end, "pK-error", spec);
+			}
+
 			/*
 			 * Only print the real pointer value if the current
 			 * process has CAP_SYSLOG and is running with the
@@ -1615,8 +1616,7 @@
 			 * leak pointer values if a binary opens a file using
 			 * %pK and then elevates privileges before reading it.
 			 */
-			const struct cred *cred = current_cred();
-
+			cred = current_cred();
 			if (!has_capability_noaudit(current, CAP_SYSLOG) ||
 			    !uid_eq(cred->euid, cred->uid) ||
 			    !gid_eq(cred->egid, cred->gid))
diff --git a/mm/Kconfig b/mm/Kconfig
index 97a4e06..03cbfa0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -624,7 +624,7 @@
 	bool
 
 config DEFERRED_STRUCT_PAGE_INIT
-	bool "Defer initialisation of struct pages to kswapd"
+	bool "Defer initialisation of struct pages to kthreads"
 	default n
 	depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	depends on MEMORY_HOTPLUG
@@ -633,9 +633,10 @@
 	  single thread. On very large machines this can take a considerable
 	  amount of time. If this option is set, large machines will bring up
 	  a subset of memmap at boot and then initialise the rest in parallel
-	  when kswapd starts. This has a potential performance impact on
-	  processes running early in the lifetime of the systemm until kswapd
-	  finishes the initialisation.
+	  by starting one-off "pgdatinitX" kernel thread for each node X. This
+	  has a potential performance impact on processes running early in the
+	  lifetime of the system until these kthreads finish the
+	  initialisation.
 
 config IDLE_PAGE_TRACKING
 	bool "Enable idle page tracking"
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index cc5d29d..c554d17 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -328,7 +328,7 @@
 	return 0;
 
 out_destroy_stat:
-	while (--i)
+	while (i--)
 		percpu_counter_destroy(&wb->stat[i]);
 	fprop_local_destroy_percpu(&wb->completions);
 out_put_cong:
@@ -989,7 +989,7 @@
 		 * here rather than calling cond_resched().
 		 */
 		if (current->flags & PF_WQ_WORKER)
-			schedule_timeout(1);
+			schedule_timeout_uninterruptible(1);
 		else
 			cond_resched();
 
diff --git a/mm/cleancache.c b/mm/cleancache.c
index 8fc5081..ba5d8f3 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
 	if (cmpxchg(&cleancache_ops, NULL, ops))
 		return -EBUSY;
diff --git a/mm/filemap.c b/mm/filemap.c
index bc94386..da7a35d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -195,6 +195,30 @@
 	else
 		cleancache_invalidate_page(mapping, page);
 
+	VM_BUG_ON_PAGE(page_mapped(page), page);
+	if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
+		int mapcount;
+
+		pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
+			 current->comm, page_to_pfn(page));
+		dump_page(page, "still mapped when deleted");
+		dump_stack();
+		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+		mapcount = page_mapcount(page);
+		if (mapping_exiting(mapping) &&
+		    page_count(page) >= mapcount + 2) {
+			/*
+			 * All vmas have already been torn down, so it's
+			 * a good bet that actually the page is unmapped,
+			 * and we'd prefer not to leak it: if we're wrong,
+			 * some other bad page check should catch it later.
+			 */
+			page_mapcount_reset(page);
+			atomic_sub(mapcount, &page->_count);
+		}
+	}
+
 	page_cache_tree_delete(mapping, page, shadow);
 
 	page->mapping = NULL;
@@ -205,7 +229,6 @@
 		__dec_zone_page_state(page, NR_FILE_PAGES);
 	if (PageSwapBacked(page))
 		__dec_zone_page_state(page, NR_SHMEM);
-	VM_BUG_ON_PAGE(page_mapped(page), page);
 
 	/*
 	 * At this point page must be either written or cleaned by truncate.
@@ -446,7 +469,8 @@
 {
 	int err = 0;
 
-	if (mapping->nrpages) {
+	if ((!dax_mapping(mapping) && mapping->nrpages) ||
+	    (dax_mapping(mapping) && mapping->nrexceptional)) {
 		err = filemap_fdatawrite(mapping);
 		/*
 		 * Even if the above returned error, the pages may be
@@ -482,13 +506,8 @@
 {
 	int err = 0;
 
-	if (dax_mapping(mapping) && mapping->nrexceptional) {
-		err = dax_writeback_mapping_range(mapping, lstart, lend);
-		if (err)
-			return err;
-	}
-
-	if (mapping->nrpages) {
+	if ((!dax_mapping(mapping) && mapping->nrpages) ||
+	    (dax_mapping(mapping) && mapping->nrexceptional)) {
 		err = __filemap_fdatawrite_range(mapping, lstart, lend,
 						 WB_SYNC_ALL);
 		/* See comment of filemap_write_and_wait() */
@@ -1890,6 +1909,7 @@
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:	file to read
  * @offset:	page index
+ * @gfp_mask:	memory allocation flags
  *
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
diff --git a/mm/gup.c b/mm/gup.c
index b64a361..7bf19ff 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -430,10 +430,8 @@
 			 * Anon pages in shared mappings are surprising: now
 			 * just reject it.
 			 */
-			if (!is_cow_mapping(vm_flags)) {
-				WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
+			if (!is_cow_mapping(vm_flags))
 				return -EFAULT;
-			}
 		}
 	} else if (!(vm_flags & VM_READ)) {
 		if (!(gup_flags & FOLL_FORCE))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fd3a07b..e10a4fe 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -138,9 +138,6 @@
 	.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
 };
 
-static DEFINE_SPINLOCK(split_queue_lock);
-static LIST_HEAD(split_queue);
-static unsigned long split_queue_len;
 static struct shrinker deferred_split_shrinker;
 
 static void set_recommended_min_free_kbytes(void)
@@ -861,7 +858,8 @@
 		return false;
 	entry = mk_pmd(zero_page, vma->vm_page_prot);
 	entry = pmd_mkhuge(entry);
-	pgtable_trans_huge_deposit(mm, pmd, pgtable);
+	if (pgtable)
+		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, haddr, pmd, entry);
 	atomic_long_inc(&mm->nr_ptes);
 	return true;
@@ -1039,13 +1037,15 @@
 	spinlock_t *dst_ptl, *src_ptl;
 	struct page *src_page;
 	pmd_t pmd;
-	pgtable_t pgtable;
+	pgtable_t pgtable = NULL;
 	int ret;
 
-	ret = -ENOMEM;
-	pgtable = pte_alloc_one(dst_mm, addr);
-	if (unlikely(!pgtable))
-		goto out;
+	if (!vma_is_dax(vma)) {
+		ret = -ENOMEM;
+		pgtable = pte_alloc_one(dst_mm, addr);
+		if (unlikely(!pgtable))
+			goto out;
+	}
 
 	dst_ptl = pmd_lock(dst_mm, dst_pmd);
 	src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1076,7 +1076,7 @@
 		goto out_unlock;
 	}
 
-	if (pmd_trans_huge(pmd)) {
+	if (!vma_is_dax(vma)) {
 		/* thp accounting separate from pmd_devmap accounting */
 		src_page = pmd_page(pmd);
 		VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
@@ -1700,7 +1700,8 @@
 		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
-		if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
+		if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
+				vma_is_anonymous(vma)) {
 			pgtable_t pgtable;
 			pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
 			pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
@@ -2835,6 +2836,7 @@
 	pgtable_t pgtable;
 	pmd_t _pmd;
 	bool young, write, dirty;
+	unsigned long addr;
 	int i;
 
 	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
@@ -2860,10 +2862,11 @@
 	young = pmd_young(*pmd);
 	dirty = pmd_dirty(*pmd);
 
+	pmdp_huge_split_prepare(vma, haddr, pmd);
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pmd_populate(mm, &_pmd, pgtable);
 
-	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+	for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
 		pte_t entry, *pte;
 		/*
 		 * Note that NUMA hinting access restrictions are not
@@ -2884,9 +2887,9 @@
 		}
 		if (dirty)
 			SetPageDirty(page + i);
-		pte = pte_offset_map(&_pmd, haddr);
+		pte = pte_offset_map(&_pmd, addr);
 		BUG_ON(!pte_none(*pte));
-		set_pte_at(mm, haddr, pte, entry);
+		set_pte_at(mm, addr, pte, entry);
 		atomic_inc(&page[i]._mapcount);
 		pte_unmap(pte);
 	}
@@ -2936,7 +2939,7 @@
 	pmd_populate(mm, pmd, pgtable);
 
 	if (freeze) {
-		for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+		for (i = 0; i < HPAGE_PMD_NR; i++) {
 			page_remove_rmap(page + i, false);
 			put_page(page + i);
 		}
@@ -3358,6 +3361,7 @@
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct page *head = compound_head(page);
+	struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
 	struct anon_vma *anon_vma;
 	int count, mapcount, ret;
 	bool mlocked;
@@ -3401,19 +3405,19 @@
 		lru_add_drain();
 
 	/* Prevent deferred_split_scan() touching ->_count */
-	spin_lock_irqsave(&split_queue_lock, flags);
+	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
 	count = page_count(head);
 	mapcount = total_mapcount(head);
 	if (!mapcount && count == 1) {
 		if (!list_empty(page_deferred_list(head))) {
-			split_queue_len--;
+			pgdata->split_queue_len--;
 			list_del(page_deferred_list(head));
 		}
-		spin_unlock_irqrestore(&split_queue_lock, flags);
+		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 		__split_huge_page(page, list);
 		ret = 0;
 	} else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-		spin_unlock_irqrestore(&split_queue_lock, flags);
+		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 		pr_alert("total_mapcount: %u, page_count(): %u\n",
 				mapcount, count);
 		if (PageTail(page))
@@ -3421,7 +3425,7 @@
 		dump_page(page, "total_mapcount(head) > 0");
 		BUG();
 	} else {
-		spin_unlock_irqrestore(&split_queue_lock, flags);
+		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 		unfreeze_page(anon_vma, head);
 		ret = -EBUSY;
 	}
@@ -3436,64 +3440,65 @@
 
 void free_transhuge_page(struct page *page)
 {
+	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
 	unsigned long flags;
 
-	spin_lock_irqsave(&split_queue_lock, flags);
+	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
 	if (!list_empty(page_deferred_list(page))) {
-		split_queue_len--;
+		pgdata->split_queue_len--;
 		list_del(page_deferred_list(page));
 	}
-	spin_unlock_irqrestore(&split_queue_lock, flags);
+	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 	free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
+	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
 	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-	spin_lock_irqsave(&split_queue_lock, flags);
+	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
 	if (list_empty(page_deferred_list(page))) {
-		list_add_tail(page_deferred_list(page), &split_queue);
-		split_queue_len++;
+		list_add_tail(page_deferred_list(page), &pgdata->split_queue);
+		pgdata->split_queue_len++;
 	}
-	spin_unlock_irqrestore(&split_queue_lock, flags);
+	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
 		struct shrink_control *sc)
 {
-	/*
-	 * Split a page from split_queue will free up at least one page,
-	 * at most HPAGE_PMD_NR - 1. We don't track exact number.
-	 * Let's use HPAGE_PMD_NR / 2 as ballpark.
-	 */
-	return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
+	struct pglist_data *pgdata = NODE_DATA(sc->nid);
+	return ACCESS_ONCE(pgdata->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
 		struct shrink_control *sc)
 {
+	struct pglist_data *pgdata = NODE_DATA(sc->nid);
 	unsigned long flags;
 	LIST_HEAD(list), *pos, *next;
 	struct page *page;
 	int split = 0;
 
-	spin_lock_irqsave(&split_queue_lock, flags);
-	list_splice_init(&split_queue, &list);
-
+	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
 	/* Take pin on all head pages to avoid freeing them under us */
-	list_for_each_safe(pos, next, &list) {
+	list_for_each_safe(pos, next, &pgdata->split_queue) {
 		page = list_entry((void *)pos, struct page, mapping);
 		page = compound_head(page);
-		/* race with put_compound_page() */
-		if (!get_page_unless_zero(page)) {
+		if (get_page_unless_zero(page)) {
+			list_move(page_deferred_list(page), &list);
+		} else {
+			/* We lost race with put_compound_page() */
 			list_del_init(page_deferred_list(page));
-			split_queue_len--;
+			pgdata->split_queue_len--;
 		}
+		if (!--sc->nr_to_scan)
+			break;
 	}
-	spin_unlock_irqrestore(&split_queue_lock, flags);
+	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
 	list_for_each_safe(pos, next, &list) {
 		page = list_entry((void *)pos, struct page, mapping);
@@ -3505,17 +3510,24 @@
 		put_page(page);
 	}
 
-	spin_lock_irqsave(&split_queue_lock, flags);
-	list_splice_tail(&list, &split_queue);
-	spin_unlock_irqrestore(&split_queue_lock, flags);
+	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+	list_splice_tail(&list, &pgdata->split_queue);
+	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
-	return split * HPAGE_PMD_NR / 2;
+	/*
+	 * Stop shrinker if we didn't split any page, but the queue is empty.
+	 * This can happen if pages were freed under us.
+	 */
+	if (!split && list_empty(&pgdata->split_queue))
+		return SHRINK_STOP;
+	return split;
 }
 
 static struct shrinker deferred_split_shrinker = {
 	.count_objects = deferred_split_count,
 	.scan_objects = deferred_split_scan,
 	.seeks = DEFAULT_SEEKS,
+	.flags = SHRINKER_NUMA_AWARE,
 };
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 12908dc..aefba5a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1001,7 +1001,7 @@
 		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 		nr_nodes--)
 
-#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
 static void destroy_compound_gigantic_page(struct page *page,
 					unsigned int order)
 {
@@ -1214,8 +1214,8 @@
 
 	set_page_private(page, 0);
 	page->mapping = NULL;
-	BUG_ON(page_count(page));
-	BUG_ON(page_mapcount(page));
+	VM_BUG_ON_PAGE(page_count(page), page);
+	VM_BUG_ON_PAGE(page_mapcount(page), page);
 	restore_reserve = PagePrivate(page);
 	ClearPagePrivate(page);
 
@@ -1286,6 +1286,7 @@
 		set_page_count(p, 0);
 		set_compound_head(p, page);
 	}
+	atomic_set(compound_mapcount_ptr(page), -1);
 }
 
 /*
@@ -2629,8 +2630,10 @@
 			hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
 	}
 	default_hstate_idx = hstate_index(size_to_hstate(default_hstate_size));
-	if (default_hstate_max_huge_pages)
-		default_hstate.max_huge_pages = default_hstate_max_huge_pages;
+	if (default_hstate_max_huge_pages) {
+		if (!default_hstate.max_huge_pages)
+			default_hstate.max_huge_pages = default_hstate_max_huge_pages;
+	}
 
 	hugetlb_init_hstates();
 	gather_bootmem_prealloc();
@@ -2748,7 +2751,7 @@
 	int ret;
 
 	if (!hugepages_supported())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	table->data = &tmp;
 	table->maxlen = sizeof(unsigned long);
@@ -2789,7 +2792,7 @@
 	int ret;
 
 	if (!hugepages_supported())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	tmp = h->nr_overcommit_huge_pages;
 
@@ -3499,7 +3502,7 @@
 	 * COW. Warn that such a situation has occurred as it may not be obvious
 	 */
 	if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
-		pr_warning("PID %d killed due to inadequate hugepage pool\n",
+		pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n",
 			   current->pid);
 		return ret;
 	}
diff --git a/mm/internal.h b/mm/internal.h
index ed8b5ff..a38a21e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -216,6 +216,37 @@
 	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+/*
+ * These three helpers classifies VMAs for virtual memory accounting.
+ */
+
+/*
+ * Executable code area - executable, not writable, not stack
+ */
+static inline bool is_exec_mapping(vm_flags_t flags)
+{
+	return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
+}
+
+/*
+ * Stack area - atomatically grows in one direction
+ *
+ * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
+ * do_mmap() forbids all other combinations.
+ */
+static inline bool is_stack_mapping(vm_flags_t flags)
+{
+	return (flags & VM_STACK) == VM_STACK;
+}
+
+/*
+ * Data area - private, writable, not stack
+ */
+static inline bool is_data_mapping(vm_flags_t flags)
+{
+	return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent);
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index bc0a8d8..1ad20ad 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
+#include <linux/linkage.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/mm.h>
@@ -60,6 +61,25 @@
 	}
 }
 
+static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+{
+	void *base = task_stack_page(task);
+	size_t size = sp - base;
+
+	kasan_unpoison_shadow(base, size);
+}
+
+/* Unpoison the entire stack for a task. */
+void kasan_unpoison_task_stack(struct task_struct *task)
+{
+	__kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE);
+}
+
+/* Unpoison the stack for the current task beyond a watermark sp value. */
+asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+{
+	__kasan_unpoison_stack(current, sp);
+}
 
 /*
  * All functions below always inlined so compiler could
diff --git a/mm/memblock.c b/mm/memblock.c
index d2ed81e..dd79899 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1448,7 +1448,7 @@
  * Remaining API functions
  */
 
-phys_addr_t __init memblock_phys_mem_size(void)
+phys_addr_t __init_memblock memblock_phys_mem_size(void)
 {
 	return memblock.memory.total_size;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 30991f8..906d8e3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1551,8 +1551,29 @@
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
 {
+	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
+/**
+ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * cow mappings.  In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ */
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot)
+{
 	int ret;
-	pgprot_t pgprot = vma->vm_page_prot;
 	/*
 	 * Technically, architectures with pte_special can avoid all these
 	 * restrictions (same for remap_pfn_range).  However we would like
@@ -1574,7 +1595,7 @@
 
 	return ret;
 }
-EXPORT_SYMBOL(vm_insert_pfn);
+EXPORT_SYMBOL(vm_insert_pfn_prot);
 
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn)
@@ -1591,10 +1612,15 @@
 	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
 	 * without pte special, it would there be refcounted as a normal page.
 	 */
-	if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
+	if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
 		struct page *page;
 
-		page = pfn_t_to_page(pfn);
+		/*
+		 * At this point we are committed to insert_page()
+		 * regardless of whether the caller specified flags that
+		 * result in pfn_t_has_page() == false.
+		 */
+		page = pfn_to_page(pfn_t_to_pfn(pfn));
 		return insert_page(vma, addr, page, vma->vm_page_prot);
 	}
 	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -2232,11 +2258,6 @@
 
 	page_cache_get(old_page);
 
-	/*
-	 * Only catch write-faults on shared writable pages,
-	 * read-only shared pages can get COWed by
-	 * get_user_pages(.write=1, .force=1).
-	 */
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 		int tmp;
 
@@ -3404,8 +3425,18 @@
 	if (unlikely(pmd_none(*pmd)) &&
 	    unlikely(__pte_alloc(mm, vma, pmd, address)))
 		return VM_FAULT_OOM;
-	/* if an huge pmd materialized from under us just retry later */
-	if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+	/*
+	 * If a huge pmd materialized under us just retry later.  Use
+	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+	 * didn't become pmd_trans_huge under us and then back to pmd_none, as
+	 * a result of MADV_DONTNEED running immediately after a huge pmd fault
+	 * in a different thread of this mm, in turn leading to a misleading
+	 * pmd_trans_huge() retval.  All we have to ensure is that it is a
+	 * regular pmd that we can walk with pte_offset_map() and we can do that
+	 * through an atomic read in C, which is what pmd_trans_unstable()
+	 * provides.
+	 */
+	if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
 		return 0;
 	/*
 	 * A regular pmd is established and it can't morph into a huge pmd
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4af58a3..979b18c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -138,7 +138,7 @@
 	res->name = "System RAM";
 	res->start = start;
 	res->end = start + size - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	if (request_resource(&iomem_resource, res) < 0) {
 		pr_debug("System RAM resource %pR cannot be added\n", res);
 		kfree(res);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 27d1354..9a3f6b9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -532,7 +532,7 @@
 		nid = page_to_nid(page);
 		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
 			continue;
-		if (PageTail(page) && PageAnon(page)) {
+		if (PageTransCompound(page) && PageAnon(page)) {
 			get_page(page);
 			pte_unmap_unlock(pte, ptl);
 			lock_page(page);
@@ -548,8 +548,7 @@
 			goto retry;
 		}
 
-		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
-			migrate_page_add(page, qp->pagelist, flags);
+		migrate_page_add(page, qp->pagelist, flags);
 	}
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
@@ -625,7 +624,7 @@
 	unsigned long endvma = vma->vm_end;
 	unsigned long flags = qp->flags;
 
-	if (vma->vm_flags & VM_PFNMAP)
+	if (!vma_migratable(vma))
 		return 1;
 
 	if (endvma > end)
@@ -644,16 +643,13 @@
 
 	if (flags & MPOL_MF_LAZY) {
 		/* Similar to task_numa_work, skip inaccessible VMAs */
-		if (vma_migratable(vma) &&
-			vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
+		if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
 			change_prot_numa(vma, start, endvma);
 		return 1;
 	}
 
-	if ((flags & MPOL_MF_STRICT) ||
-	    ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-	     vma_migratable(vma)))
-		/* queue pages from current vma */
+	/* queue pages from current vma */
+	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
 		return 0;
 	return 1;
 }
diff --git a/mm/mempool.c b/mm/mempool.c
index 004d42b..7924f4f 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -135,8 +135,8 @@
 	void *element = pool->elements[--pool->curr_nr];
 
 	BUG_ON(pool->curr_nr < 0);
-	check_element(pool, element);
 	kasan_unpoison_element(pool, element);
+	check_element(pool, element);
 	return element;
 }
 
diff --git a/mm/migrate.c b/mm/migrate.c
index b1034f9..3ad0fea 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1582,7 +1582,7 @@
 					 (GFP_HIGHUSER_MOVABLE |
 					  __GFP_THISNODE | __GFP_NOMEMALLOC |
 					  __GFP_NORETRY | __GFP_NOWARN) &
-					 ~(__GFP_IO | __GFP_FS), 0);
+					 ~__GFP_RECLAIM, 0);
 
 	return newpage;
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 84b1262..90e3b86 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -69,6 +70,8 @@
 int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 #endif
 
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
@@ -387,8 +390,9 @@
 }
 
 #ifdef CONFIG_DEBUG_VM_RB
-static int browse_rb(struct rb_root *root)
+static int browse_rb(struct mm_struct *mm)
 {
+	struct rb_root *root = &mm->mm_rb;
 	int i = 0, j, bug = 0;
 	struct rb_node *nd, *pn = NULL;
 	unsigned long prev = 0, pend = 0;
@@ -411,12 +415,14 @@
 				  vma->vm_start, vma->vm_end);
 			bug = 1;
 		}
+		spin_lock(&mm->page_table_lock);
 		if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
 			pr_emerg("free gap %lx, correct %lx\n",
 			       vma->rb_subtree_gap,
 			       vma_compute_subtree_gap(vma));
 			bug = 1;
 		}
+		spin_unlock(&mm->page_table_lock);
 		i++;
 		pn = nd;
 		prev = vma->vm_start;
@@ -453,12 +459,16 @@
 	struct vm_area_struct *vma = mm->mmap;
 
 	while (vma) {
+		struct anon_vma *anon_vma = vma->anon_vma;
 		struct anon_vma_chain *avc;
 
-		vma_lock_anon_vma(vma);
-		list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-			anon_vma_interval_tree_verify(avc);
-		vma_unlock_anon_vma(vma);
+		if (anon_vma) {
+			anon_vma_lock_read(anon_vma);
+			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+				anon_vma_interval_tree_verify(avc);
+			anon_vma_unlock_read(anon_vma);
+		}
+
 		highest_address = vma->vm_end;
 		vma = vma->vm_next;
 		i++;
@@ -472,7 +482,7 @@
 			  mm->highest_vm_end, highest_address);
 		bug = 1;
 	}
-	i = browse_rb(&mm->mm_rb);
+	i = browse_rb(mm);
 	if (i != mm->map_count) {
 		if (i != -1)
 			pr_emerg("map_count %d rb %d\n", mm->map_count, i);
@@ -2139,32 +2149,27 @@
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	int error;
+	int error = 0;
 
 	if (!(vma->vm_flags & VM_GROWSUP))
 		return -EFAULT;
 
-	/*
-	 * We must make sure the anon_vma is allocated
-	 * so that the anon_vma locking is not a noop.
-	 */
+	/* Guard against wrapping around to address 0. */
+	if (address < PAGE_ALIGN(address+4))
+		address = PAGE_ALIGN(address+4);
+	else
+		return -ENOMEM;
+
+	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
 		return -ENOMEM;
-	vma_lock_anon_vma(vma);
 
 	/*
 	 * vma->vm_start/vm_end cannot change under us because the caller
 	 * is required to hold the mmap_sem in read mode.  We need the
 	 * anon_vma lock to serialize against concurrent expand_stacks.
-	 * Also guard against wrapping around to address 0.
 	 */
-	if (address < PAGE_ALIGN(address+4))
-		address = PAGE_ALIGN(address+4);
-	else {
-		vma_unlock_anon_vma(vma);
-		return -ENOMEM;
-	}
-	error = 0;
+	anon_vma_lock_write(vma->anon_vma);
 
 	/* Somebody else might have raced and expanded it already */
 	if (address > vma->vm_end) {
@@ -2182,7 +2187,7 @@
 				 * updates, but we only hold a shared mmap_sem
 				 * lock here, so we need to protect against
 				 * concurrent vma expansions.
-				 * vma_lock_anon_vma() doesn't help here, as
+				 * anon_vma_lock_write() doesn't help here, as
 				 * we don't guarantee that all growable vmas
 				 * in a mm share the same root anon vma.
 				 * So, we reuse mm->page_table_lock to guard
@@ -2205,7 +2210,7 @@
 			}
 		}
 	}
-	vma_unlock_anon_vma(vma);
+	anon_vma_unlock_write(vma->anon_vma);
 	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(mm);
 	return error;
@@ -2221,25 +2226,21 @@
 	struct mm_struct *mm = vma->vm_mm;
 	int error;
 
-	/*
-	 * We must make sure the anon_vma is allocated
-	 * so that the anon_vma locking is not a noop.
-	 */
-	if (unlikely(anon_vma_prepare(vma)))
-		return -ENOMEM;
-
 	address &= PAGE_MASK;
 	error = security_mmap_addr(address);
 	if (error)
 		return error;
 
-	vma_lock_anon_vma(vma);
+	/* We must make sure the anon_vma is allocated. */
+	if (unlikely(anon_vma_prepare(vma)))
+		return -ENOMEM;
 
 	/*
 	 * vma->vm_start/vm_end cannot change under us because the caller
 	 * is required to hold the mmap_sem in read mode.  We need the
 	 * anon_vma lock to serialize against concurrent expand_stacks.
 	 */
+	anon_vma_lock_write(vma->anon_vma);
 
 	/* Somebody else might have raced and expanded it already */
 	if (address < vma->vm_start) {
@@ -2257,7 +2258,7 @@
 				 * updates, but we only hold a shared mmap_sem
 				 * lock here, so we need to protect against
 				 * concurrent vma expansions.
-				 * vma_lock_anon_vma() doesn't help here, as
+				 * anon_vma_lock_write() doesn't help here, as
 				 * we don't guarantee that all growable vmas
 				 * in a mm share the same root anon vma.
 				 * So, we reuse mm->page_table_lock to guard
@@ -2278,7 +2279,7 @@
 			}
 		}
 	}
-	vma_unlock_anon_vma(vma);
+	anon_vma_unlock_write(vma->anon_vma);
 	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(mm);
 	return error;
@@ -2663,12 +2664,29 @@
 	if (!vma || !(vma->vm_flags & VM_SHARED))
 		goto out;
 
-	if (start < vma->vm_start || start + size > vma->vm_end)
+	if (start < vma->vm_start)
 		goto out;
 
-	if (pgoff == linear_page_index(vma, start)) {
-		ret = 0;
-		goto out;
+	if (start + size > vma->vm_end) {
+		struct vm_area_struct *next;
+
+		for (next = vma->vm_next; next; next = next->vm_next) {
+			/* hole between vmas ? */
+			if (next->vm_start != next->vm_prev->vm_end)
+				goto out;
+
+			if (next->vm_file != vma->vm_file)
+				goto out;
+
+			if (next->vm_flags != vma->vm_flags)
+				goto out;
+
+			if (start + size <= next->vm_end)
+				break;
+		}
+
+		if (!next)
+			goto out;
 	}
 
 	prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2678,9 +2696,16 @@
 	flags &= MAP_NONBLOCK;
 	flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
 	if (vma->vm_flags & VM_LOCKED) {
+		struct vm_area_struct *tmp;
 		flags |= MAP_LOCKED;
+
 		/* drop PG_Mlocked flag for over-mapped range */
-		munlock_vma_pages_range(vma, start, start + size);
+		for (tmp = vma; tmp->vm_start >= start + size;
+				tmp = tmp->vm_next) {
+			munlock_vma_pages_range(tmp,
+					max(tmp->vm_start, start),
+					min(tmp->vm_end, start + size));
+		}
 	}
 
 	file = get_file(vma->vm_file);
@@ -2982,9 +3007,17 @@
 	if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
 		return false;
 
-	if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
-				(VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
-		return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
+	if (is_data_mapping(flags) &&
+	    mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+		if (ignore_rlimit_data)
+			pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+				     "%lu. Will be forbidden soon.\n",
+				     current->comm, current->pid,
+				     (mm->data_vm + npages) << PAGE_SHIFT,
+				     rlimit(RLIMIT_DATA));
+		else
+			return false;
+	}
 
 	return true;
 }
@@ -2993,11 +3026,11 @@
 {
 	mm->total_vm += npages;
 
-	if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
+	if (is_exec_mapping(flags))
 		mm->exec_vm += npages;
-	else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
+	else if (is_stack_mapping(flags))
 		mm->stack_vm += npages;
-	else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+	else if (is_data_mapping(flags))
 		mm->data_vm += npages;
 }
 
@@ -3033,11 +3066,16 @@
 	pgoff_t pgoff;
 	struct page **pages;
 
-	if (vma->vm_ops == &legacy_special_mapping_vmops)
+	if (vma->vm_ops == &legacy_special_mapping_vmops) {
 		pages = vma->vm_private_data;
-	else
-		pages = ((struct vm_special_mapping *)vma->vm_private_data)->
-			pages;
+	} else {
+		struct vm_special_mapping *sm = vma->vm_private_data;
+
+		if (sm->fault)
+			return sm->fault(sm, vma, vmf);
+
+		pages = sm->pages;
+	}
 
 	for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
 		pgoff--;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8eb7bb4..f7cb3d4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -160,9 +160,11 @@
 		}
 
 		if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
-			if (next - addr != HPAGE_PMD_SIZE)
+			if (next - addr != HPAGE_PMD_SIZE) {
 				split_huge_pmd(vma, pmd, addr);
-			else {
+				if (pmd_none(*pmd))
+					continue;
+			} else {
 				int nr_ptes = change_huge_pmd(vma, pmd, addr,
 						newprot, prot_numa);
 
diff --git a/mm/mremap.c b/mm/mremap.c
index d77946a..8eeba02 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -210,6 +210,8 @@
 				}
 			}
 			split_huge_pmd(vma, old_pmd, old_addr);
+			if (pmd_none(*old_pmd))
+				continue;
 			VM_BUG_ON(pmd_trans_huge(*old_pmd));
 		}
 		if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 63358d9..838ca8bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5210,6 +5210,11 @@
 	pgdat->numabalancing_migrate_nr_pages = 0;
 	pgdat->numabalancing_migrate_next_window = jiffies;
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	spin_lock_init(&pgdat->split_queue_lock);
+	INIT_LIST_HEAD(&pgdat->split_queue);
+	pgdat->split_queue_len = 0;
+#endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
 	pgdat_page_ext_init(pgdat);
@@ -6615,7 +6620,7 @@
 	return !has_unmovable_pages(zone, page, 0, true);
 }
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 
 static unsigned long pfn_max_align_down(unsigned long pfn)
 {
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 9d47676..06a005b 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -90,9 +90,9 @@
  * ARCHes with special requirements for evicting THP backing TLB entries can
  * implement this. Otherwise also, it can help optimize normal TLB flush in
  * THP regime. stock flush_tlb_range() typically has optimization to nuke the
- * entire TLB TLB if flush span is greater than a threshhold, which will
+ * entire TLB if flush span is greater than a threshold, which will
  * likely be true for a single huge page. Thus a single thp flush will
- * invalidate the entire TLB which is not desitable.
+ * invalidate the entire TLB which is not desirable.
  * e.g. see arch/arc: flush_pmd_tlb_range
  */
 #define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
@@ -195,7 +195,9 @@
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 	VM_BUG_ON(pmd_trans_huge(*pmdp));
 	pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
-	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+	/* collapse entails shooting down ptes not pmd */
+	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	return pmd;
 }
 #endif
diff --git a/mm/slab.c b/mm/slab.c
index 6ecc697..621fbcb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2275,7 +2275,7 @@
 
 	err = setup_cpu_cache(cachep, gfp);
 	if (err) {
-		__kmem_cache_shutdown(cachep);
+		__kmem_cache_release(cachep);
 		return err;
 	}
 
@@ -2414,12 +2414,13 @@
 
 int __kmem_cache_shutdown(struct kmem_cache *cachep)
 {
+	return __kmem_cache_shrink(cachep, false);
+}
+
+void __kmem_cache_release(struct kmem_cache *cachep)
+{
 	int i;
 	struct kmem_cache_node *n;
-	int rc = __kmem_cache_shrink(cachep, false);
-
-	if (rc)
-		return rc;
 
 	free_percpu(cachep->cpu_cache);
 
@@ -2430,7 +2431,6 @@
 		kfree(n);
 		cachep->node[i] = NULL;
 	}
-	return 0;
 }
 
 /*
diff --git a/mm/slab.h b/mm/slab.h
index 834ad24..2eedace 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -140,6 +140,7 @@
 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
 
 int __kmem_cache_shutdown(struct kmem_cache *);
+void __kmem_cache_release(struct kmem_cache *);
 int __kmem_cache_shrink(struct kmem_cache *, bool);
 void slab_kmem_cache_release(struct kmem_cache *);
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index b50aef0..065b7bd 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -693,6 +693,7 @@
 
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
+	__kmem_cache_release(s);
 	destroy_memcg_params(s);
 	kfree_const(s->name);
 	kmem_cache_free(kmem_cache, s);
diff --git a/mm/slob.c b/mm/slob.c
index 17e8f8c..5ec1580 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -630,6 +630,10 @@
 	return 0;
 }
 
+void __kmem_cache_release(struct kmem_cache *c)
+{
+}
+
 int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate)
 {
 	return 0;
diff --git a/mm/slub.c b/mm/slub.c
index 2e1355a..d8fbd4a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1592,18 +1592,12 @@
 	__add_partial(n, page, tail);
 }
 
-static inline void
-__remove_partial(struct kmem_cache_node *n, struct page *page)
-{
-	list_del(&page->lru);
-	n->nr_partial--;
-}
-
 static inline void remove_partial(struct kmem_cache_node *n,
 					struct page *page)
 {
 	lockdep_assert_held(&n->list_lock);
-	__remove_partial(n, page);
+	list_del(&page->lru);
+	n->nr_partial--;
 }
 
 /*
@@ -3184,6 +3178,12 @@
 	}
 }
 
+void __kmem_cache_release(struct kmem_cache *s)
+{
+	free_percpu(s->cpu_slab);
+	free_kmem_cache_nodes(s);
+}
+
 static int init_kmem_cache_nodes(struct kmem_cache *s)
 {
 	int node;
@@ -3443,28 +3443,31 @@
 
 /*
  * Attempt to free all partial slabs on a node.
- * This is called from kmem_cache_close(). We must be the last thread
- * using the cache and therefore we do not need to lock anymore.
+ * This is called from __kmem_cache_shutdown(). We must take list_lock
+ * because sysfs file might still access partial list after the shutdowning.
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
 	struct page *page, *h;
 
+	BUG_ON(irqs_disabled());
+	spin_lock_irq(&n->list_lock);
 	list_for_each_entry_safe(page, h, &n->partial, lru) {
 		if (!page->inuse) {
-			__remove_partial(n, page);
+			remove_partial(n, page);
 			discard_slab(s, page);
 		} else {
 			list_slab_objects(s, page,
-			"Objects remaining in %s on kmem_cache_close()");
+			"Objects remaining in %s on __kmem_cache_shutdown()");
 		}
 	}
+	spin_unlock_irq(&n->list_lock);
 }
 
 /*
  * Release all resources used by a slab cache.
  */
-static inline int kmem_cache_close(struct kmem_cache *s)
+int __kmem_cache_shutdown(struct kmem_cache *s)
 {
 	int node;
 	struct kmem_cache_node *n;
@@ -3476,16 +3479,9 @@
 		if (n->nr_partial || slabs_node(s, node))
 			return 1;
 	}
-	free_percpu(s->cpu_slab);
-	free_kmem_cache_nodes(s);
 	return 0;
 }
 
-int __kmem_cache_shutdown(struct kmem_cache *s)
-{
-	return kmem_cache_close(s);
-}
-
 /********************************************************************
  *		Kmalloc subsystem
  *******************************************************************/
@@ -3980,7 +3976,7 @@
 	memcg_propagate_slab_attrs(s);
 	err = sysfs_slab_add(s);
 	if (err)
-		kmem_cache_close(s);
+		__kmem_cache_release(s);
 
 	return err;
 }
diff --git a/mm/util.c b/mm/util.c
index c108a65..4fb14ca 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,36 +230,11 @@
 }
 
 /* Check if the vma is being used as a stack by this task */
-static int vm_is_stack_for_task(struct task_struct *t,
-				struct vm_area_struct *vma)
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
 {
 	return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
-/*
- * Check if the vma is being used as a stack.
- * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the task_struct of the task
- * that the vma is stack for. Must be called under rcu_read_lock().
- */
-struct task_struct *task_of_stack(struct task_struct *task,
-				struct vm_area_struct *vma, bool in_group)
-{
-	if (vm_is_stack_for_task(task, vma))
-		return task;
-
-	if (in_group) {
-		struct task_struct *t;
-
-		for_each_thread(task, t) {
-			if (vm_is_stack_for_task(t, vma))
-				return t;
-		}
-	}
-
-	return NULL;
-}
-
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 9a6c070..149fdf6 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -248,9 +248,8 @@
 
 	if (tree) {
 		spin_lock(&vmpr->sr_lock);
-		vmpr->tree_scanned += scanned;
+		scanned = vmpr->tree_scanned += scanned;
 		vmpr->tree_reclaimed += reclaimed;
-		scanned = vmpr->scanned;
 		spin_unlock(&vmpr->sr_lock);
 
 		if (scanned < vmpressure_win)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eb3dd37..71b1c29 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1443,7 +1443,7 @@
 	int ret = -EBUSY;
 
 	VM_BUG_ON_PAGE(!page_count(page), page);
-	VM_BUG_ON_PAGE(PageTail(page), page);
+	WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
 
 	if (PageLRU(page)) {
 		struct zone *zone = page_zone(page);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 40b2c74..084c672 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1396,10 +1396,15 @@
 		 * Counters were updated so we expect more updates
 		 * to occur in the future. Keep on running the
 		 * update worker thread.
+		 * If we were marked on cpu_stat_off clear the flag
+		 * so that vmstat_shepherd doesn't schedule us again.
 		 */
-		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
-			this_cpu_ptr(&vmstat_work),
-			round_jiffies_relative(sysctl_stat_interval));
+		if (!cpumask_test_and_clear_cpu(smp_processor_id(),
+						cpu_stat_off)) {
+			queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+				this_cpu_ptr(&vmstat_work),
+				round_jiffies_relative(sysctl_stat_interval));
+		}
 	} else {
 		/*
 		 * We did not update any counters so the app may be in
@@ -1417,18 +1422,6 @@
  * until the diffs stay at zero. The function is used by NOHZ and can only be
  * invoked when tick processing is not active.
  */
-void quiet_vmstat(void)
-{
-	if (system_state != SYSTEM_RUNNING)
-		return;
-
-	do {
-		if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
-			cancel_delayed_work(this_cpu_ptr(&vmstat_work));
-
-	} while (refresh_cpu_vm_stats(false));
-}
-
 /*
  * Check if the diffs for a certain cpu indicate that
  * an update is needed.
@@ -1452,6 +1445,30 @@
 	return false;
 }
 
+void quiet_vmstat(void)
+{
+	if (system_state != SYSTEM_RUNNING)
+		return;
+
+	/*
+	 * If we are already in hands of the shepherd then there
+	 * is nothing for us to do here.
+	 */
+	if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
+		return;
+
+	if (!need_update(smp_processor_id()))
+		return;
+
+	/*
+	 * Just refresh counters and do not care about the pending delayed
+	 * vmstat_update. It doesn't fire that often to matter and canceling
+	 * it would be too expensive from this path.
+	 * vmstat_shepherd will take care about that for us.
+	 */
+	refresh_cpu_vm_stats(false);
+}
+
 
 /*
  * Shepherd worker thread that checks the
@@ -1469,18 +1486,25 @@
 
 	get_online_cpus();
 	/* Check processors whose vmstat worker threads have been disabled */
-	for_each_cpu(cpu, cpu_stat_off)
-		if (need_update(cpu) &&
-			cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+	for_each_cpu(cpu, cpu_stat_off) {
+		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
-			queue_delayed_work_on(cpu, vmstat_wq,
-				&per_cpu(vmstat_work, cpu), 0);
-
+		if (need_update(cpu)) {
+			if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+				queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+		} else {
+			/*
+			 * Cancel the work if quiet_vmstat has put this
+			 * cpu on cpu_stat_off because the work item might
+			 * be still scheduled
+			 */
+			cancel_delayed_work(dw);
+		}
+	}
 	put_online_cpus();
 
 	schedule_delayed_work(&shepherd,
 		round_jiffies_relative(sysctl_stat_interval));
-
 }
 
 static void __init start_shepherd_timer(void)
@@ -1488,7 +1512,7 @@
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu),
+		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
 			vmstat_update);
 
 	if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index d5871ac..f066781 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1625,7 +1625,7 @@
 
 		rt = atrtr_find(&at_hint);
 	}
-	err = ENETUNREACH;
+	err = -ENETUNREACH;
 	if (!rt)
 		goto out;
 
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e6c8382..ccf70be 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -527,11 +527,12 @@
 		 * gets dereferenced.
 		 */
 		spin_lock_bh(&bat_priv->gw.list_lock);
-		hlist_del_init_rcu(&gw_node->list);
+		if (!hlist_unhashed(&gw_node->list)) {
+			hlist_del_init_rcu(&gw_node->list);
+			batadv_gw_node_free_ref(gw_node);
+		}
 		spin_unlock_bh(&bat_priv->gw.list_lock);
 
-		batadv_gw_node_free_ref(gw_node);
-
 		curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 		if (gw_node == curr_gw)
 			batadv_gw_reselect(bat_priv);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 01acccc..57f71071 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -76,6 +76,28 @@
 }
 
 /**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+static bool batadv_mutual_parents(const struct net_device *dev1,
+				  const struct net_device *dev2)
+{
+	int dev1_parent_iflink = dev_get_iflink(dev1);
+	int dev2_parent_iflink = dev_get_iflink(dev2);
+
+	if (!dev1_parent_iflink || !dev2_parent_iflink)
+		return false;
+
+	return (dev1_parent_iflink == dev2->ifindex) &&
+	       (dev2_parent_iflink == dev1->ifindex);
+}
+
+/**
  * batadv_is_on_batman_iface - check if a device is a batman iface descendant
  * @net_dev: the device to check
  *
@@ -108,6 +130,9 @@
 	if (WARN(!parent_dev, "Cannot find parent device"))
 		return false;
 
+	if (batadv_mutual_parents(net_dev, parent_dev))
+		return false;
+
 	ret = batadv_is_on_batman_iface(parent_dev);
 
 	return ret;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index cdfc85f..0e80fd1 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -303,9 +303,11 @@
 
 	if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
 		spin_lock_bh(&orig_node->vlan_list_lock);
-		hlist_del_init_rcu(&vlan->list);
+		if (!hlist_unhashed(&vlan->list)) {
+			hlist_del_init_rcu(&vlan->list);
+			batadv_orig_node_vlan_free_ref(vlan);
+		}
 		spin_unlock_bh(&orig_node->vlan_list_lock);
-		batadv_orig_node_vlan_free_ref(vlan);
 	}
 
 	batadv_orig_node_vlan_free_ref(vlan);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d040365..8a4cc2f 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -307,6 +307,9 @@
 
 	/* check that it's our buffer */
 	if (lowpan_is_ipv6(*skb_network_header(skb))) {
+		/* Pull off the 1-byte of 6lowpan header. */
+		skb_pull(skb, 1);
+
 		/* Copy the packet so that the IPv6 header is
 		 * properly aligned.
 		 */
@@ -317,6 +320,7 @@
 
 		local_skb->protocol = htons(ETH_P_IPV6);
 		local_skb->pkt_type = PACKET_HOST;
+		local_skb->dev = dev;
 
 		skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
 
@@ -335,6 +339,8 @@
 		if (!local_skb)
 			goto drop;
 
+		local_skb->dev = dev;
+
 		ret = iphc_decompress(local_skb, dev, chan);
 		if (ret < 0) {
 			kfree_skb(local_skb);
@@ -343,7 +349,6 @@
 
 		local_skb->protocol = htons(ETH_P_IPV6);
 		local_skb->pkt_type = PACKET_HOST;
-		local_skb->dev = dev;
 
 		if (give_skb_to_upper(local_skb, dev)
 				!= NET_RX_SUCCESS) {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 47bcef754..883c821 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4112,8 +4112,10 @@
 			break;
 		}
 
-		*req_complete = bt_cb(skb)->hci.req_complete;
-		*req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+		if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
+			*req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+		else
+			*req_complete = bt_cb(skb)->hci.req_complete;
 		kfree_skb(skb);
 	}
 	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 41b5f38..c78ee2d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -688,21 +688,29 @@
 	 * command to remove it from the controller.
 	 */
 	list_for_each_entry(b, &hdev->le_white_list, list) {
-		struct hci_cp_le_del_from_white_list cp;
+		/* If the device is neither in pend_le_conns nor
+		 * pend_le_reports then remove it from the whitelist.
+		 */
+		if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+					       &b->bdaddr, b->bdaddr_type) &&
+		    !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+					       &b->bdaddr, b->bdaddr_type)) {
+			struct hci_cp_le_del_from_white_list cp;
 
-		if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
-					      &b->bdaddr, b->bdaddr_type) ||
-		    hci_pend_le_action_lookup(&hdev->pend_le_reports,
-					      &b->bdaddr, b->bdaddr_type)) {
-			white_list_entries++;
+			cp.bdaddr_type = b->bdaddr_type;
+			bacpy(&cp.bdaddr, &b->bdaddr);
+
+			hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+				    sizeof(cp), &cp);
 			continue;
 		}
 
-		cp.bdaddr_type = b->bdaddr_type;
-		bacpy(&cp.bdaddr, &b->bdaddr);
+		if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+			/* White list can not be used with RPAs */
+			return 0x00;
+		}
 
-		hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
-			    sizeof(cp), &cp);
+		white_list_entries++;
 	}
 
 	/* Since all no longer valid white list entries have been
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 39a5149..eb4f5f2 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -197,10 +197,20 @@
 		chan->sport = psm;
 		err = 0;
 	} else {
-		u16 p;
+		u16 p, start, end, incr;
+
+		if (chan->src_type == BDADDR_BREDR) {
+			start = L2CAP_PSM_DYN_START;
+			end = L2CAP_PSM_AUTO_END;
+			incr = 2;
+		} else {
+			start = L2CAP_PSM_LE_DYN_START;
+			end = L2CAP_PSM_LE_DYN_END;
+			incr = 1;
+		}
 
 		err = -EINVAL;
-		for (p = 0x1001; p < 0x1100; p += 2)
+		for (p = start; p <= end; p += incr)
 			if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
 				chan->psm   = cpu_to_le16(p);
 				chan->sport = cpu_to_le16(p);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bb5515..e4cae72 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -58,7 +58,7 @@
 		return -EINVAL;
 
 	/* Restrict usage of well-known PSMs */
-	if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE))
+	if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
 	return 0;
@@ -67,11 +67,11 @@
 static int l2cap_validate_le_psm(u16 psm)
 {
 	/* Valid LE_PSM ranges are defined only until 0x00ff */
-	if (psm > 0x00ff)
+	if (psm > L2CAP_PSM_LE_DYN_END)
 		return -EINVAL;
 
 	/* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
-	if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE))
+	if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
 	return 0;
@@ -125,6 +125,9 @@
 			goto done;
 	}
 
+	bacpy(&chan->src, &la.l2_bdaddr);
+	chan->src_type = la.l2_bdaddr_type;
+
 	if (la.l2_cid)
 		err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
 	else
@@ -156,9 +159,6 @@
 		break;
 	}
 
-	bacpy(&chan->src, &la.l2_bdaddr);
-	chan->src_type = la.l2_bdaddr_type;
-
 	if (chan->psm && bdaddr_type_is_le(chan->src_type))
 		chan->mode = L2CAP_MODE_LE_FLOWCTL;
 
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index ffed8a1..4b175df 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1072,22 +1072,6 @@
 			hcon->dst_type = smp->remote_irk->addr_type;
 			queue_work(hdev->workqueue, &conn->id_addr_update_work);
 		}
-
-		/* When receiving an indentity resolving key for
-		 * a remote device that does not use a resolvable
-		 * private address, just remove the key so that
-		 * it is possible to use the controller white
-		 * list for scanning.
-		 *
-		 * Userspace will have been told to not store
-		 * this key at this point. So it is safe to
-		 * just remove it.
-		 */
-		if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
-			list_del_rcu(&smp->remote_irk->list);
-			kfree_rcu(smp->remote_irk, rcu);
-			smp->remote_irk = NULL;
-		}
 	}
 
 	if (smp->csrk) {
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a1abe49..3addc05 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,6 +121,7 @@
 	.notifier_call = br_device_event
 };
 
+/* called with RTNL */
 static int br_switchdev_event(struct notifier_block *unused,
 			      unsigned long event, void *ptr)
 {
@@ -130,7 +131,6 @@
 	struct switchdev_notifier_fdb_info *fdb_info;
 	int err = NOTIFY_DONE;
 
-	rtnl_lock();
 	p = br_port_get_rtnl(dev);
 	if (!p)
 		goto out;
@@ -155,7 +155,6 @@
 	}
 
 out:
-	rtnl_unlock();
 	return err;
 }
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 82e3e97..dcea4f4 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -723,6 +723,8 @@
 		struct net_bridge_fdb_entry *f;
 
 		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
+			int err;
+
 			if (idx < cb->args[0])
 				goto skip;
 
@@ -741,12 +743,15 @@
 			if (!filter_dev && f->dst)
 				goto skip;
 
-			if (fdb_fill_info(skb, br, f,
-					  NETLINK_CB(cb->skb).portid,
-					  cb->nlh->nlmsg_seq,
-					  RTM_NEWNEIGH,
-					  NLM_F_MULTI) < 0)
+			err = fdb_fill_info(skb, br, f,
+					    NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    RTM_NEWNEIGH,
+					    NLM_F_MULTI);
+			if (err < 0) {
+				cb->args[1] = err;
 				break;
+			}
 skip:
 			++idx;
 		}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 30e105f..74c278e 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -425,8 +425,8 @@
 	mp = br_mdb_ip_get(mdb, group);
 	if (!mp) {
 		mp = br_multicast_new_group(br, port, group);
-		err = PTR_ERR(mp);
-		if (IS_ERR(mp))
+		err = PTR_ERR_OR_ZERO(mp);
+		if (err)
 			return err;
 	}
 
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 61d7617..b82440e 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -159,7 +159,7 @@
 		tmppkt = NULL;
 
 		/* Verify that length is correct */
-		err = EPROTO;
+		err = -EPROTO;
 		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
 			goto out;
 	}
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 393bfb2..5fcfb98 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -403,6 +403,7 @@
  * @local_retries: localized retries
  * @local_fallback_retries: localized fallback retries
  * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @stable: stable mode starts rep=0 in the recursive call for all replicas
  * @vary_r: pass r to recursive calls
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  * @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@
 			       unsigned int local_fallback_retries,
 			       int recurse_to_leaf,
 			       unsigned int vary_r,
+			       unsigned int stable,
 			       int *out2,
 			       int parent_r)
 {
@@ -433,13 +435,13 @@
 	int collide, reject;
 	int count = out_size;
 
-	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
 		recurse_to_leaf ? "_LEAF" : "",
 		bucket->id, x, outpos, numrep,
 		tries, recurse_tries, local_retries, local_fallback_retries,
-		parent_r);
+		parent_r, stable);
 
-	for (rep = outpos; rep < numrep && count > 0 ; rep++) {
+	for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
 		/* keep trying until we get a non-out, non-colliding item */
 		ftotal = 0;
 		skip_rep = 0;
@@ -512,13 +514,14 @@
 						if (crush_choose_firstn(map,
 							 map->buckets[-1-item],
 							 weight, weight_max,
-							 x, outpos+1, 0,
+							 x, stable ? 1 : outpos+1, 0,
 							 out2, outpos, count,
 							 recurse_tries, 0,
 							 local_retries,
 							 local_fallback_retries,
 							 0,
 							 vary_r,
+							 stable,
 							 NULL,
 							 sub_r) <= outpos)
 							/* didn't get leaf */
@@ -816,6 +819,7 @@
 	int choose_local_fallback_retries = map->choose_local_fallback_tries;
 
 	int vary_r = map->chooseleaf_vary_r;
+	int stable = map->chooseleaf_stable;
 
 	if ((__u32)ruleno >= map->max_rules) {
 		dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@
 		case CRUSH_RULE_TAKE:
 			if ((curstep->arg1 >= 0 &&
 			     curstep->arg1 < map->max_devices) ||
-			    (-1-curstep->arg1 < map->max_buckets &&
+			    (-1-curstep->arg1 >= 0 &&
+			     -1-curstep->arg1 < map->max_buckets &&
 			     map->buckets[-1-curstep->arg1])) {
 				w[0] = curstep->arg1;
 				wsize = 1;
@@ -869,6 +874,11 @@
 				vary_r = curstep->arg1;
 			break;
 
+		case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
+			if (curstep->arg1 >= 0)
+				stable = curstep->arg1;
+			break;
+
 		case CRUSH_RULE_CHOOSELEAF_FIRSTN:
 		case CRUSH_RULE_CHOOSE_FIRSTN:
 			firstn = 1;
@@ -888,6 +898,7 @@
 			osize = 0;
 
 			for (i = 0; i < wsize; i++) {
+				int bno;
 				/*
 				 * see CRUSH_N, CRUSH_N_MINUS macros.
 				 * basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@
 						continue;
 				}
 				j = 0;
+				/* make sure bucket id is valid */
+				bno = -1 - w[i];
+				if (bno < 0 || bno >= map->max_buckets) {
+					/* w[i] is probably CRUSH_ITEM_NONE */
+					dprintk("  bad w[i] %d\n", w[i]);
+					continue;
+				}
 				if (firstn) {
 					int recurse_tries;
 					if (choose_leaf_tries)
@@ -911,7 +929,7 @@
 						recurse_tries = choose_tries;
 					osize += crush_choose_firstn(
 						map,
-						map->buckets[-1-w[i]],
+						map->buckets[bno],
 						weight, weight_max,
 						x, numrep,
 						curstep->arg2,
@@ -923,6 +941,7 @@
 						choose_local_fallback_retries,
 						recurse_to_leaf,
 						vary_r,
+						stable,
 						c+osize,
 						0);
 				} else {
@@ -930,7 +949,7 @@
 						    numrep : (result_max-osize));
 					crush_choose_indep(
 						map,
-						map->buckets[-1-w[i]],
+						map->buckets[bno],
 						weight, weight_max,
 						x, out_size, numrep,
 						curstep->arg2,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9cfedf5..9382619 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1197,6 +1197,13 @@
 	return new_piece;
 }
 
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+	return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+	    sizeof(struct ceph_msg_footer) :
+	    sizeof(struct ceph_msg_footer_old);
+}
+
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
 	BUG_ON(!msg);
@@ -2335,9 +2342,9 @@
 			ceph_pr_addr(&con->peer_addr.in_addr),
 			seq, con->in_seq + 1);
 		con->in_base_pos = -front_len - middle_len - data_len -
-			sizeof(m->footer);
+			sizeof_footer(con);
 		con->in_tag = CEPH_MSGR_TAG_READY;
-		return 0;
+		return 1;
 	} else if ((s64)seq - (s64)con->in_seq > 1) {
 		pr_err("read_partial_message bad seq %lld expected %lld\n",
 		       seq, con->in_seq + 1);
@@ -2360,10 +2367,10 @@
 			/* skip this message */
 			dout("alloc_msg said skip message\n");
 			con->in_base_pos = -front_len - middle_len - data_len -
-				sizeof(m->footer);
+				sizeof_footer(con);
 			con->in_tag = CEPH_MSGR_TAG_READY;
 			con->in_seq++;
-			return 0;
+			return 1;
 		}
 
 		BUG_ON(!con->in_msg);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f8f2359..5bc0537 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1770,6 +1770,7 @@
 	u32 osdmap_epoch;
 	int already_completed;
 	u32 bytes;
+	u8 decode_redir;
 	unsigned int i;
 
 	tid = le64_to_cpu(msg->hdr.tid);
@@ -1841,6 +1842,15 @@
 		p += 8 + 4; /* skip replay_version */
 		p += 8; /* skip user_version */
 
+		if (le16_to_cpu(msg->hdr.version) >= 7)
+			ceph_decode_8_safe(&p, end, decode_redir, bad_put);
+		else
+			decode_redir = 1;
+	} else {
+		decode_redir = 0;
+	}
+
+	if (decode_redir) {
 		err = ceph_redirect_decode(&p, end, &redir);
 		if (err)
 			goto bad_put;
@@ -2843,8 +2853,8 @@
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
 	if (!req) {
-		pr_warn("%s osd%d tid %llu unknown, skipping\n",
-			__func__, osd->o_osd, tid);
+		dout("%s osd%d tid %llu unknown, skipping\n", __func__,
+		     osd->o_osd, tid);
 		m = NULL;
 		*skip = 1;
 		goto out;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 7d8f581..243574c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -342,23 +342,32 @@
         c->choose_local_tries = ceph_decode_32(p);
         c->choose_local_fallback_tries =  ceph_decode_32(p);
         c->choose_total_tries = ceph_decode_32(p);
-        dout("crush decode tunable choose_local_tries = %d",
+        dout("crush decode tunable choose_local_tries = %d\n",
              c->choose_local_tries);
-        dout("crush decode tunable choose_local_fallback_tries = %d",
+        dout("crush decode tunable choose_local_fallback_tries = %d\n",
              c->choose_local_fallback_tries);
-        dout("crush decode tunable choose_total_tries = %d",
+        dout("crush decode tunable choose_total_tries = %d\n",
              c->choose_total_tries);
 
 	ceph_decode_need(p, end, sizeof(u32), done);
 	c->chooseleaf_descend_once = ceph_decode_32(p);
-	dout("crush decode tunable chooseleaf_descend_once = %d",
+	dout("crush decode tunable chooseleaf_descend_once = %d\n",
 	     c->chooseleaf_descend_once);
 
 	ceph_decode_need(p, end, sizeof(u8), done);
 	c->chooseleaf_vary_r = ceph_decode_8(p);
-	dout("crush decode tunable chooseleaf_vary_r = %d",
+	dout("crush decode tunable chooseleaf_vary_r = %d\n",
 	     c->chooseleaf_vary_r);
 
+	/* skip straw_calc_version, allowed_bucket_algs */
+	ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
+	*p += sizeof(u8) + sizeof(u32);
+
+	ceph_decode_need(p, end, sizeof(u8), done);
+	c->chooseleaf_stable = ceph_decode_8(p);
+	dout("crush decode tunable chooseleaf_stable = %d\n",
+	     c->chooseleaf_stable);
+
 done:
 	dout("crush_decode success\n");
 	return c;
diff --git a/net/core/dev.c b/net/core/dev.c
index cc9e365..0ef061b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4351,6 +4351,7 @@
 
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
+		diffs |= skb_metadata_dst_cmp(p, skb);
 		if (maclen == ETH_HLEN)
 			diffs |= compare_ether_header(skb_mac_header(p),
 						      skb_mac_header(skb));
@@ -4548,10 +4549,12 @@
 		break;
 
 	case GRO_MERGED_FREE:
-		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+			skb_dst_drop(skb);
 			kmem_cache_free(skbuff_head_cache, skb);
-		else
+		} else {
 			__kfree_skb(skb);
+		}
 		break;
 
 	case GRO_HELD:
@@ -5376,12 +5379,12 @@
 {
 	struct netdev_adjacent *lower;
 
-	lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+	lower = list_entry(*iter, struct netdev_adjacent, list);
 
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 
-	*iter = &lower->list;
+	*iter = lower->list.next;
 
 	return lower->dev;
 }
@@ -7419,8 +7422,10 @@
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
-	if (!dev->tx_queue_len)
+	if (!dev->tx_queue_len) {
 		dev->priv_flags |= IFF_NO_QUEUE;
+		dev->tx_queue_len = 1;
+	}
 
 	dev->num_tx_queues = txqs;
 	dev->real_num_tx_queues = txqs;
diff --git a/net/core/filter.c b/net/core/filter.c
index 94d2620..bba502f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1752,7 +1752,7 @@
 	u8 compat[sizeof(struct bpf_tunnel_key)];
 	struct ip_tunnel_info *info;
 
-	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6)))
+	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX)))
 		return -EINVAL;
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
@@ -1776,7 +1776,7 @@
 	info = &md->u.tun_info;
 	info->mode = IP_TUNNEL_INFO_TX;
 
-	info->key.tun_flags = TUNNEL_KEY;
+	info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM;
 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
 	info->key.tos = from->tunnel_tos;
 	info->key.ttl = from->tunnel_ttl;
@@ -1787,6 +1787,8 @@
 		       sizeof(from->remote_ipv6));
 	} else {
 		info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+		if (flags & BPF_F_ZERO_CSUM_TX)
+			info->key.tun_flags &= ~TUNNEL_CSUM;
 	}
 
 	return 0;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c..12e7003 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -208,7 +208,6 @@
 	case htons(ETH_P_IPV6): {
 		const struct ipv6hdr *iph;
 		struct ipv6hdr _iph;
-		__be32 flow_label;
 
 ipv6:
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -230,8 +229,12 @@
 			key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 		}
 
-		flow_label = ip6_flowlabel(iph);
-		if (flow_label) {
+		if ((dissector_uses_key(flow_dissector,
+					FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
+		     (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
+		    ip6_flowlabel(iph)) {
+			__be32 flow_label = ip6_flowlabel(iph);
+
 			if (dissector_uses_key(flow_dissector,
 					       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
 				key_tags = skb_flow_dissector_target(flow_dissector,
@@ -396,6 +399,13 @@
 				goto out_bad;
 			proto = eth->h_proto;
 			nhoff += sizeof(*eth);
+
+			/* Cap headers that we access via pointers at the
+			 * end of the Ethernet header as our maximum alignment
+			 * at that point is only 2 bytes.
+			 */
+			if (NET_IP_ALIGN)
+				hlen = nhoff;
 		}
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e85..8261d95 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2911,6 +2911,7 @@
 	nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
 out:
 	netif_addr_unlock_bh(dev);
+	cb->args[1] = err;
 	return idx;
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -2944,6 +2945,7 @@
 		ops = br_dev->netdev_ops;
 	}
 
+	cb->args[1] = 0;
 	for_each_netdev(net, dev) {
 		if (brport_idx && (dev->ifindex != brport_idx))
 			continue;
@@ -2971,12 +2973,16 @@
 				idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
 							 idx);
 		}
+		if (cb->args[1] == -EMSGSIZE)
+			break;
 
 		if (dev->netdev_ops->ndo_fdb_dump)
 			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
 							    idx);
 		else
 			idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+		if (cb->args[1] == -EMSGSIZE)
+			break;
 
 		cops = NULL;
 	}
diff --git a/net/core/scm.c b/net/core/scm.c
index 14596fb..2696aef 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@
 		*fplp = fpl;
 		fpl->count = 0;
 		fpl->max = SCM_MAX_FD;
+		fpl->user = NULL;
 	}
 	fpp = &fpl->fp[fpl->count];
 
@@ -107,6 +108,10 @@
 		*fpp++ = file;
 		fpl->count++;
 	}
+
+	if (!fpl->user)
+		fpl->user = get_uid(current_user());
+
 	return num;
 }
 
@@ -119,6 +124,7 @@
 		scm->fp = NULL;
 		for (i=fpl->count-1; i>=0; i--)
 			fput(fpl->fp[i]);
+		free_uid(fpl->user);
 		kfree(fpl);
 	}
 }
@@ -336,6 +342,7 @@
 		for (i = 0; i < fpl->count; i++)
 			get_file(fpl->fp[i]);
 		new_fpl->max = new_fpl->count;
+		new_fpl->user = get_uid(fpl->user);
 	}
 	return new_fpl;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375..8616d11 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,8 @@
 
 struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_max_skb_frags);
 
 /**
  *	skb_panic - private function for out-of-line support
@@ -2946,6 +2948,24 @@
 EXPORT_SYMBOL_GPL(skb_append_pagefrags);
 
 /**
+ *	skb_push_rcsum - push skb and update receive checksum
+ *	@skb: buffer to update
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_push on the packet and updates
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_push unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
+{
+	skb_push(skb, len);
+	skb_postpush_rcsum(skb, skb->data, len);
+	return skb->data;
+}
+
+/**
  *	skb_pull_rcsum - pull skb and update receive checksum
  *	@skb: buffer to update
  *	@len: length of data pulled
@@ -4082,9 +4102,9 @@
 	if (!pskb_may_pull(skb_chk, offset))
 		goto err;
 
-	__skb_pull(skb_chk, offset);
+	skb_pull_rcsum(skb_chk, offset);
 	ret = skb_chkf(skb_chk);
-	__skb_push(skb_chk, offset);
+	skb_push_rcsum(skb_chk, offset);
 
 	if (ret)
 		goto err;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 1df98c5..e92b759 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -93,10 +93,17 @@
  *  @sk2: Socket belonging to the existing reuseport group.
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
 {
 	struct sock_reuseport *reuse;
 
+	if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+		int err = reuseport_alloc(sk2);
+
+		if (err)
+			return err;
+	}
+
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock)),
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 95b6139..a6beb7b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,7 @@
 static int one = 1;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
+static int max_skb_frags = MAX_SKB_FRAGS;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
@@ -392,6 +393,15 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "max_skb_frags",
+		.data		= &sysctl_max_skb_frags,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &max_skb_frags,
+	},
 	{ }
 };
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5684e14..902d606 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -824,26 +824,26 @@
 
 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		if (likely(sk->sk_state == DCCP_LISTEN)) {
-			nsk = dccp_check_req(sk, skb, req);
-		} else {
+		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = dccp_check_req(sk, skb, req);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 		} else if (dccp_child_process(sk, nsk, skb)) {
 			dccp_v4_ctl_send_reset(sk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c6d050..b8608b7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -691,26 +691,26 @@
 
 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		if (likely(sk->sk_state == DCCP_LISTEN)) {
-			nsk = dccp_check_req(sk, skb, req);
-		} else {
+		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = dccp_check_req(sk, skb, req);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 		} else if (dccp_child_process(sk, nsk, skb)) {
 			dccp_v6_ctl_send_reset(sk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 40b9ca7..ab24521 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1194,7 +1194,6 @@
 	if (ret) {
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
-		phy_disconnect(p->phy);
 		ds->ports[port] = NULL;
 		free_netdev(slave_dev);
 		return ret;
@@ -1205,6 +1204,7 @@
 	ret = dsa_slave_phy_setup(p, slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d setting up slave phy\n", ret);
+		unregister_netdev(slave_dev);
 		free_netdev(slave_dev);
 		return ret;
 	}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c229205..7758247 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -353,6 +353,7 @@
 	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
+	select CRYPTO_ECHAINIV
 	---help---
 	  Support for IPsec ESP.
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d3..f6303b1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1847,7 +1847,7 @@
 	if (err < 0)
 		goto errout;
 
-	err = EINVAL;
+	err = -EINVAL;
 	if (!tb[NETCONFA_IFINDEX])
 		goto errout;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 7aea0cc..d07fc07 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1394,9 +1394,10 @@
 		struct fib_info *fi = fa->fa_info;
 		int nhsel, err;
 
-		if ((index >= (1ul << fa->fa_slen)) &&
-		    ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
-			continue;
+		if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
+			if (index >= (1ul << fa->fa_slen))
+				continue;
+		}
 		if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
 			continue;
 		if (fi->fib_dead)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba..b3086cf 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -356,9 +356,8 @@
 	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
-	skb->reserved_tailroom = skb_end_offset(skb) -
-				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
+	skb_tailroom_reserve(skb, mtu, tlen);
 
 	skb_reset_network_header(skb);
 	pip = ip_hdr(skb);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 46b9c88..6414891 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -789,14 +789,16 @@
 	reqsk_put(req);
 }
 
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
-			      struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+				      struct request_sock *req,
+				      struct sock *child)
 {
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 
 	spin_lock(&queue->rskq_lock);
 	if (unlikely(sk->sk_state != TCP_LISTEN)) {
 		inet_child_forget(sk, req, child);
+		child = NULL;
 	} else {
 		req->sk = child;
 		req->dl_next = NULL;
@@ -808,6 +810,7 @@
 		sk_acceptq_added(sk);
 	}
 	spin_unlock(&queue->rskq_lock);
+	return child;
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
 
@@ -817,11 +820,8 @@
 	if (own_req) {
 		inet_csk_reqsk_queue_drop(sk, req);
 		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-		inet_csk_reqsk_queue_add(sk, req, child);
-		/* Warning: caller must not call reqsk_put(req);
-		 * child stole last reference on it.
-		 */
-		return child;
+		if (inet_csk_reqsk_queue_add(sk, req, child))
+			return child;
 	}
 	/* Too bad, another child took ownership of the request, undo. */
 	bh_unlock_sock(child);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8bb8e7a..6029157 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -361,13 +361,20 @@
 				 req->id.idiag_dport, req->id.idiag_src[0],
 				 req->id.idiag_sport, req->id.idiag_if);
 #if IS_ENABLED(CONFIG_IPV6)
-	else if (req->sdiag_family == AF_INET6)
-		sk = inet6_lookup(net, hashinfo,
-				  (struct in6_addr *)req->id.idiag_dst,
-				  req->id.idiag_dport,
-				  (struct in6_addr *)req->id.idiag_src,
-				  req->id.idiag_sport,
-				  req->id.idiag_if);
+	else if (req->sdiag_family == AF_INET6) {
+		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+			sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+					 req->id.idiag_dport, req->id.idiag_src[3],
+					 req->id.idiag_sport, req->id.idiag_if);
+		else
+			sk = inet6_lookup(net, hashinfo,
+					  (struct in6_addr *)req->id.idiag_dst,
+					  req->id.idiag_dport,
+					  (struct in6_addr *)req->id.idiag_src,
+					  req->id.idiag_sport,
+					  req->id.idiag_if);
+	}
 #endif
 	else
 		return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3f00810..187c6fc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -661,6 +661,7 @@
 	struct ipq *qp;
 
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+	skb_orphan(skb);
 
 	/* Lookup (or create) queue header */
 	qp = ip_find(net, ip_hdr(skb), user, vif);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7c51c4e..41ba68d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1054,8 +1054,9 @@
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
-	dev->netdev_ops		= &gre_tap_netdev_ops;
-	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
+	dev->netdev_ops	= &gre_tap_netdev_ops;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
@@ -1240,6 +1241,14 @@
 	err = ipgre_newlink(net, dev, tb, NULL);
 	if (err < 0)
 		goto out;
+
+	/* openvswitch users expect packet sizes to be unrestricted,
+	 * so set the largest MTU we can.
+	 */
+	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+	if (err)
+		goto out;
+
 	return dev;
 out:
 	free_netdev(dev);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1209b6..d77eb0c 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -316,7 +316,10 @@
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 
-	if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
+	if (sysctl_ip_early_demux &&
+	    !skb_dst(skb) &&
+	    !skb->sk &&
+	    !ip_is_fragment(iph)) {
 		const struct net_protocol *ipprot;
 		int protocol = iph->protocol;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 64878ef..565bf64 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1236,13 +1236,16 @@
 	if (!skb)
 		return -EINVAL;
 
-	cork->length += size;
 	if ((size + skb->len > mtu) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
+			return -EOPNOTSUPP;
+
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
+	cork->length += size;
 
 	while (size > 0) {
 		if (skb_is_gso(skb)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c..a501242 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@
 		switch (cmsg->cmsg_type) {
 		case IP_RETOPTS:
 			err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+			/* Our caller is responsible for freeing ipc->opt */
 			err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
 					     err < 40 ? err : 40);
 			if (err)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c7bd72e..336e689 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -661,6 +661,8 @@
 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 	connected = (tunnel->parms.iph.daddr != 0);
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	dst = tnl_params->daddr;
 	if (dst == 0) {
 		/* NBMA tunnel */
@@ -758,7 +760,6 @@
 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 			tunnel->err_count--;
 
-			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 			dst_link_failure(skb);
 		} else
 			tunnel->err_count = 0;
@@ -943,17 +944,31 @@
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+	int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
 
-	if (new_mtu < 68 ||
-	    new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+	if (new_mtu < 68)
 		return -EINVAL;
+
+	if (new_mtu > max_mtu) {
+		if (strict)
+			return -EINVAL;
+
+		new_mtu = max_mtu;
+	}
+
 	dev->mtu = new_mtu;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 
 static void ip_tunnel_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 67f7c9d..2ed9dd2 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -143,7 +143,11 @@
 
 /* Persistent data: */
 
+#ifdef IPCONFIG_DYNAMIC
 static int ic_proto_used;			/* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
 static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
 static u8 ic_domain[64];		/* DNS (not NIS) domain name */
 
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 6fb869f6..a04dee5 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -27,8 +27,6 @@
 {
 	int err;
 
-	skb_orphan(skb);
-
 	local_bh_disable();
 	err = ip_defrag(net, skb, user);
 	local_bh_enable();
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c117b21..d3a2716 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -746,8 +746,10 @@
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
-		if (err)
+		if (unlikely(err)) {
+			kfree(ipc.opt);
 			return err;
+		}
 		if (ipc.opt)
 			free = 1;
 	}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bc35f18..7113bae 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -547,8 +547,10 @@
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(net, msg, &ipc, false);
-		if (err)
+		if (unlikely(err)) {
+			kfree(ipc.opt);
 			goto out;
+		}
 		if (ipc.opt)
 			free = 1;
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85f184e..02c6229 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 
+static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
 /*
  *	Interface to generic destination cache.
  */
@@ -755,7 +756,7 @@
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
-						      0, 0);
+						0, jiffies + ip_rt_gc_timeout);
 			}
 			if (kill_route)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@
 #endif
 }
 
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+	struct fnhe_hash_bucket *hash;
+	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+	u32 hval = fnhe_hashfun(daddr);
+
+	spin_lock_bh(&fnhe_lock);
+
+	hash = rcu_dereference_protected(nh->nh_exceptions,
+					 lockdep_is_held(&fnhe_lock));
+	hash += hval;
+
+	fnhe_p = &hash->chain;
+	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+	while (fnhe) {
+		if (fnhe->fnhe_daddr == daddr) {
+			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			fnhe_flush_routes(fnhe);
+			kfree_rcu(fnhe, rcu);
+			break;
+		}
+		fnhe_p = &fnhe->fnhe_next;
+		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+						 lockdep_is_held(&fnhe_lock));
+	}
+
+	spin_unlock_bh(&fnhe_lock);
+}
+
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   const struct fib_result *res,
@@ -1609,11 +1640,20 @@
 
 	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
 	if (do_cache) {
-		if (fnhe)
+		if (fnhe) {
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
-		else
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			if (rth && rth->dst.expires &&
+			    time_after(jiffies, rth->dst.expires)) {
+				ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+				fnhe = NULL;
+			} else {
+				goto rt_cache;
+			}
+		}
 
+		rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+
+rt_cache:
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2014,19 +2054,29 @@
 		struct fib_nh *nh = &FIB_RES_NH(*res);
 
 		fnhe = find_exception(nh, fl4->daddr);
-		if (fnhe)
+		if (fnhe) {
 			prth = &fnhe->fnhe_rth_output;
-		else {
-			if (unlikely(fl4->flowi4_flags &
-				     FLOWI_FLAG_KNOWN_NH &&
-				     !(nh->nh_gw &&
-				       nh->nh_scope == RT_SCOPE_LINK))) {
-				do_cache = false;
-				goto add;
+			rth = rcu_dereference(*prth);
+			if (rth && rth->dst.expires &&
+			    time_after(jiffies, rth->dst.expires)) {
+				ip_del_fnhe(nh, fl4->daddr);
+				fnhe = NULL;
+			} else {
+				goto rt_cache;
 			}
-			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		}
+
+		if (unlikely(fl4->flowi4_flags &
+			     FLOWI_FLAG_KNOWN_NH &&
+			     !(nh->nh_gw &&
+			       nh->nh_scope == RT_SCOPE_LINK))) {
+			do_cache = false;
+			goto add;
+		}
+		prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		rth = rcu_dereference(*prth);
+
+rt_cache:
 		if (rt_cache_valid(rth)) {
 			dst_hold(&rth->dst);
 			return rth;
@@ -2569,7 +2619,6 @@
 }
 
 #ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
 static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
 static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
 static int ip_rt_gc_elasticity __read_mostly	= 8;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fd17eec..483ffdf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include <asm/unaligned.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -939,7 +940,7 @@
 
 		i = skb_shinfo(skb)->nr_frags;
 		can_coalesce = skb_can_coalesce(skb, i, page, offset);
-		if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+		if (!can_coalesce && i >= sysctl_max_skb_frags) {
 			tcp_mark_push(tp, skb);
 			goto new_segment;
 		}
@@ -1212,7 +1213,7 @@
 
 			if (!skb_can_coalesce(skb, i, pfrag->page,
 					      pfrag->offset)) {
-				if (i == MAX_SKB_FRAGS || !sg) {
+				if (i == sysctl_max_skb_frags || !sg) {
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				}
@@ -2638,6 +2639,7 @@
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 	unsigned int start;
+	u64 rate64;
 	u32 rate;
 
 	memset(info, 0, sizeof(*info));
@@ -2703,15 +2705,17 @@
 	info->tcpi_total_retrans = tp->total_retrans;
 
 	rate = READ_ONCE(sk->sk_pacing_rate);
-	info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+	rate64 = rate != ~0U ? rate : ~0ULL;
+	put_unaligned(rate64, &info->tcpi_pacing_rate);
 
 	rate = READ_ONCE(sk->sk_max_pacing_rate);
-	info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+	rate64 = rate != ~0U ? rate : ~0ULL;
+	put_unaligned(rate64, &info->tcpi_max_pacing_rate);
 
 	do {
 		start = u64_stats_fetch_begin_irq(&tp->syncp);
-		info->tcpi_bytes_acked = tp->bytes_acked;
-		info->tcpi_bytes_received = tp->bytes_received;
+		put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+		put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
 	} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
 	info->tcpi_segs_out = tp->segs_out;
 	info->tcpi_segs_in = tp->segs_in;
@@ -2946,7 +2950,7 @@
 			struct crypto_hash *hash;
 
 			hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-			if (IS_ERR_OR_NULL(hash))
+			if (IS_ERR(hash))
 				return;
 			per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
 		}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0003d40..3b2c8e9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2164,8 +2164,7 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int cnt, oldcnt;
-	int err;
+	int cnt, oldcnt, lost;
 	unsigned int mss;
 	/* Use SACK to deduce losses of new sequences sent during recovery */
 	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2204,10 @@
 				break;
 
 			mss = tcp_skb_mss(skb);
-			err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
-					   mss, GFP_ATOMIC);
-			if (err < 0)
+			/* If needed, chop off the prefix to mark as lost. */
+			lost = (packets - oldcnt) * mss;
+			if (lost < skb->len &&
+			    tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
 				break;
 			cnt = packets;
 		}
@@ -2366,8 +2366,6 @@
 			tp->snd_ssthresh = tp->prior_ssthresh;
 			tcp_ecn_withdraw_cwr(tp);
 		}
-	} else {
-		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
 	}
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->undo_marker = 0;
@@ -2898,7 +2896,10 @@
 {
 	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
 	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
-	struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+	struct rtt_meas rttm = {
+		.rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+		.ts = now,
+	};
 	u32 elapsed;
 
 	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5ced3e4..487ac67 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -311,7 +311,7 @@
 
 
 /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 {
 	struct request_sock *req = inet_reqsk(sk);
 	struct net *net = sock_net(sk);
@@ -323,7 +323,7 @@
 
 	if (seq != tcp_rsk(req)->snt_isn) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-	} else {
+	} else if (abort) {
 		/*
 		 * Still in SYN_RECV, just remove it silently.
 		 * There is no good way to pass the error to the newly
@@ -383,7 +383,12 @@
 	}
 	seq = ntohl(th->seq);
 	if (sk->sk_state == TCP_NEW_SYN_RECV)
-		return tcp_req_err(sk, seq);
+		return tcp_req_err(sk, seq,
+				  type == ICMP_PARAMETERPROB ||
+				  type == ICMP_TIME_EXCEEDED ||
+				  (type == ICMP_DEST_UNREACH &&
+				   (code == ICMP_NET_UNREACH ||
+				    code == ICMP_HOST_UNREACH)));
 
 	bh_lock_sock(sk);
 	/* If too many ICMPs get dropped on busy
@@ -707,7 +712,8 @@
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+			    struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key,
 			    int reply_flags, u8 tos)
@@ -722,7 +728,6 @@
 			];
 	} rep;
 	struct ip_reply_arg arg;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
@@ -784,7 +789,8 @@
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+	tcp_v4_send_ack(sock_net(sk), skb,
+			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent,
@@ -803,8 +809,10 @@
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 	 */
-	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
-			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+					     tcp_sk(sk)->snd_nxt;
+
+	tcp_v4_send_ack(sock_net(sk), skb, seq,
 			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
 			tcp_time_stamp,
 			req->ts_recent,
@@ -1589,28 +1597,30 @@
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		if (tcp_v4_inbound_md5_hash(sk, skb))
-			goto discard_and_relse;
-		if (likely(sk->sk_state == TCP_LISTEN)) {
-			nsk = tcp_check_req(sk, skb, req, false);
-		} else {
+		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v4_send_reset(nsk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b..a726d78 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -550,7 +550,7 @@
 	 */
 	if (crtt > tp->srtt_us) {
 		/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
-		crtt /= 8 * USEC_PER_MSEC;
+		crtt /= 8 * USEC_PER_SEC / HZ;
 		inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
 	} else if (tp->srtt_us == 0) {
 		/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a9..9b02af2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -455,7 +455,7 @@
 
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->segs_in = 0;
+		newtp->segs_in = 1;
 
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -815,6 +815,7 @@
 	int ret = 0;
 	int state = child->sk_state;
 
+	tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	if (!sock_owned_by_user(child)) {
 		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dc45b53..95d2f19 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -499,6 +499,7 @@
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
 	int score, badness, matches = 0, reuseport = 0;
+	bool select_ok = true;
 	u32 hash = 0;
 
 begin:
@@ -512,14 +513,18 @@
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				struct sock *sk2;
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
-				sk2 = reuseport_select_sock(sk, hash, skb,
-							    sizeof(struct udphdr));
-				if (sk2) {
-					result = sk2;
-					goto found;
+				if (select_ok) {
+					struct sock *sk2;
+
+					sk2 = reuseport_select_sock(sk, hash, skb,
+							sizeof(struct udphdr));
+					if (sk2) {
+						result = sk2;
+						select_ok = false;
+						goto found;
+					}
 				}
 				matches = 1;
 			}
@@ -563,6 +568,7 @@
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	int score, badness, matches = 0, reuseport = 0;
+	bool select_ok = true;
 	u32 hash = 0;
 
 	rcu_read_lock();
@@ -601,14 +607,18 @@
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				struct sock *sk2;
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
-				sk2 = reuseport_select_sock(sk, hash, skb,
+				if (select_ok) {
+					struct sock *sk2;
+
+					sk2 = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (sk2) {
-					result = sk2;
-					goto found;
+					if (sk2) {
+						result = sk2;
+						select_ok = false;
+						goto found;
+					}
 				}
 				matches = 1;
 			}
@@ -1038,8 +1048,10 @@
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc,
 				   sk->sk_family == AF_INET6);
-		if (err)
+		if (unlikely(err)) {
+			kfree(ipc.opt);
 			return err;
+		}
 		if (ipc.opt)
 			free = 1;
 		connected = 0;
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 0ec0881..96599d1 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -89,6 +89,8 @@
 	uh->source = src_port;
 	uh->len = htons(skb->len);
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	udp_set_csum(nocheck, skb, src, dst, skb->len);
 
 	iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index bb7dabe..40c8975 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -69,6 +69,7 @@
 	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
+	select CRYPTO_ECHAINIV
 	---help---
 	  Support for IPsec ESP.
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 38eedde..bdd7eac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -583,7 +583,7 @@
 	if (err < 0)
 		goto errout;
 
-	err = EINVAL;
+	err = -EINVAL;
 	if (!tb[NETCONFA_IFINDEX])
 		goto errout;
 
@@ -3538,6 +3538,7 @@
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
+	bool notify = false;
 
 	addrconf_join_solict(dev, &ifp->addr);
 
@@ -3583,7 +3584,7 @@
 			/* Because optimistic nodes can use this address,
 			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
 			 */
-			ipv6_ifa_notify(RTM_NEWADDR, ifp);
+			notify = true;
 		}
 	}
 
@@ -3591,6 +3592,8 @@
 out:
 	spin_unlock(&ifp->lock);
 	read_unlock_bh(&idev->lock);
+	if (notify)
+		ipv6_ifa_notify(RTM_NEWADDR, ifp);
 }
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 517c55b..4281621 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,6 +162,9 @@
 	fl6.fl6_dport = inet->inet_dport;
 	fl6.fl6_sport = inet->inet_sport;
 
+	if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
 	if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl6.flowi6_oif = np->mcast_oif;
 
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e..9508a20 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@
 						*fragoff = _frag_off;
 					return hp->nexthdr;
 				}
-				return -ENOENT;
+				if (!found)
+					return -ENOENT;
+				if (fragoff)
+					*fragoff = _frag_off;
+				break;
 			}
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1f9ebe3..dc2db4f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@
 		}
 		spin_lock_bh(&ip6_sk_fl_lock);
 		for (sflp = &np->ipv6_fl_list;
-		     (sfl = rcu_dereference(*sflp)) != NULL;
+		     (sfl = rcu_dereference_protected(*sflp,
+						      lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
 		     sflp = &sfl->next) {
 			if (sfl->fl->label == freq.flr_label) {
 				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-				*sflp = rcu_dereference(sfl->next);
+				*sflp = sfl->next;
 				spin_unlock_bh(&ip6_sk_fl_lock);
 				fl_release(sfl->fl);
 				kfree_rcu(sfl, rcu);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f37f18b..c0d4dc1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -777,6 +777,8 @@
 	__u32 mtu;
 	int err;
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		encap_limit = t->parms.encap_limit;
 
@@ -1512,6 +1514,7 @@
 	dev->destructor = ip6gre_dev_free;
 
 	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 }
 
 static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 23de98f..a163102 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -909,6 +909,7 @@
 	struct rt6_info *rt;
 #endif
 	int err;
+	int flags = 0;
 
 	/* The correct way to handle this would be to do
 	 * ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@
 			dst_release(*dst);
 			*dst = NULL;
 		}
+
+		if (fl6->flowi6_oif)
+			flags |= RT6_LOOKUP_F_IFACE;
 	}
 
 	if (!*dst)
-		*dst = ip6_route_output(net, sk, fl6);
+		*dst = ip6_route_output_flags(net, sk, fl6, flags);
 
 	err = (*dst)->error;
 	if (err)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 137fca4..6c5dfec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1180,6 +1180,8 @@
 	u8 tproto;
 	int err;
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	tproto = ACCESS_ONCE(t->parms.proto);
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ee56d0..d64ee7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@
 		return NULL;
 
 	skb->priority = TC_PRIO_CONTROL;
-	skb->reserved_tailroom = skb_end_offset(skb) -
-				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
+	skb_tailroom_reserve(skb, mtu, tlen);
 
 	if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 31ba7ca..051b6a6 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -21,6 +21,10 @@
 #include <net/ipv6.h>
 #include <net/netfilter/ipv6/nf_nat_masquerade.h>
 
+#define MAX_WORK_COUNT	16
+
+static atomic_t v6_worker_count;
+
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 		       const struct net_device *out)
@@ -78,14 +82,78 @@
 	.notifier_call	= masq_device_event,
 };
 
+struct masq_dev_work {
+	struct work_struct work;
+	struct net *net;
+	int ifindex;
+};
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+	struct masq_dev_work *w;
+	long index;
+
+	w = container_of(work, struct masq_dev_work, work);
+
+	index = w->ifindex;
+	nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+
+	put_net(w->net);
+	kfree(w);
+	atomic_dec(&v6_worker_count);
+	module_put(THIS_MODULE);
+}
+
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
+ * schedule.
+ *
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * time if there are lots of conntracks and the system
+ * handles high softirq load, so it frequently calls cond_resched
+ * while iterating the conntrack table.
+ *
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount
+ * of ipv6 addresses being deleted), we also need to add an upper
+ * limit to the number of queued work items.
+ */
 static int masq_inet_event(struct notifier_block *this,
 			   unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = ptr;
-	struct netdev_notifier_info info;
+	const struct net_device *dev;
+	struct masq_dev_work *w;
+	struct net *net;
 
-	netdev_notifier_info_init(&info, ifa->idev->dev);
-	return masq_device_event(this, event, &info);
+	if (event != NETDEV_DOWN ||
+	    atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
+		return NOTIFY_DONE;
+
+	dev = ifa->idev->dev;
+	net = maybe_get_net(dev_net(dev));
+	if (!net)
+		return NOTIFY_DONE;
+
+	if (!try_module_get(THIS_MODULE))
+		goto err_module;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (w) {
+		atomic_inc(&v6_worker_count);
+
+		INIT_WORK(&w->work, iterate_cleanup_work);
+		w->ifindex = dev->ifindex;
+		w->net = net;
+		schedule_work(&w->work);
+
+		return NOTIFY_DONE;
+	}
+
+	module_put(THIS_MODULE);
+ err_module:
+	put_net(net);
+	return NOTIFY_DONE;
 }
 
 static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3c8834b..ed44663 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1183,11 +1183,10 @@
 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-				    struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+					 struct flowi6 *fl6, int flags)
 {
 	struct dst_entry *dst;
-	int flags = 0;
 	bool any_src;
 
 	dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1208,7 +1207,7 @@
 
 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 }
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e794ef6..2066d1c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -201,14 +201,14 @@
 	if ((__force u16)t->parms.i_flags & SIT_ISATAP)
 		dev->priv_flags |= IFF_ISATAP;
 
+	dev->rtnl_link_ops = &sit_link_ops;
+
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out;
 
 	ipip6_tunnel_clone_6rd(dev, sitn);
 
-	dev->rtnl_link_ops = &sit_link_ops;
-
 	dev_hold(dev);
 
 	ipip6_tunnel_link(sitn, t);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 006396e..5c8c842 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -327,6 +327,7 @@
 	struct tcp_sock *tp;
 	__u32 seq, snd_una;
 	struct sock *sk;
+	bool fatal;
 	int err;
 
 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -345,8 +346,9 @@
 		return;
 	}
 	seq = ntohl(th->seq);
+	fatal = icmpv6_err_convert(type, code, &err);
 	if (sk->sk_state == TCP_NEW_SYN_RECV)
-		return tcp_req_err(sk, seq);
+		return tcp_req_err(sk, seq, fatal);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -400,7 +402,6 @@
 		goto out;
 	}
 
-	icmpv6_err_convert(type, code, &err);
 
 	/* Might be for an request_sock */
 	switch (sk->sk_state) {
@@ -1386,7 +1387,7 @@
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
 		tcp_v6_fill_cb(skb, hdr, th);
@@ -1394,24 +1395,24 @@
 			reqsk_put(req);
 			goto discard_it;
 		}
-		if (likely(sk->sk_state == TCP_LISTEN)) {
-			nsk = tcp_check_req(sk, skb, req, false);
-		} else {
+		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 			tcp_v6_restore_cb(skb);
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v6_send_reset(nsk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5d2c2af..422dd01 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -257,6 +257,7 @@
 	struct sock *sk, *result;
 	struct hlist_nulls_node *node;
 	int score, badness, matches = 0, reuseport = 0;
+	bool select_ok = true;
 	u32 hash = 0;
 
 begin:
@@ -270,14 +271,18 @@
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				struct sock *sk2;
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
-				sk2 = reuseport_select_sock(sk, hash, skb,
-							    sizeof(struct udphdr));
-				if (sk2) {
-					result = sk2;
-					goto found;
+				if (select_ok) {
+					struct sock *sk2;
+
+					sk2 = reuseport_select_sock(sk, hash, skb,
+							sizeof(struct udphdr));
+					if (sk2) {
+						result = sk2;
+						select_ok = false;
+						goto found;
+					}
 				}
 				matches = 1;
 			}
@@ -321,6 +326,7 @@
 	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	int score, badness, matches = 0, reuseport = 0;
+	bool select_ok = true;
 	u32 hash = 0;
 
 	rcu_read_lock();
@@ -358,14 +364,18 @@
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				struct sock *sk2;
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
-				sk2 = reuseport_select_sock(sk, hash, skb,
+				if (select_ok) {
+					struct sock *sk2;
+
+					sk2 = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (sk2) {
-					result = sk2;
-					goto found;
+					if (sk2) {
+						result = sk2;
+						select_ok = false;
+						goto found;
+					}
 				}
 				matches = 1;
 			}
@@ -952,11 +962,9 @@
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
-		/* a return value > 0 means to resubmit the input, but
-		 * it wants the return to be -protocol, or 0
-		 */
+		/* a return value > 0 means to resubmit the input */
 		if (ret > 0)
-			return -ret;
+			return ret;
 
 		return 0;
 	}
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 3c4caa6..5728e76 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -134,11 +134,10 @@
 		return -1;
 	}
 	skb_put(skb, count);
+	pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
 
 	spin_unlock_irqrestore(&self->spinlock, flags);
 
-	pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
-
 	if (flush) {
 		/* ircomm_tty_do_softint will take care of the rest */
 		schedule_work(&self->tqueue);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ef50a94..fc3598a 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -708,6 +708,9 @@
 	if (!addr || addr->sa_family != AF_IUCV)
 		return -EINVAL;
 
+	if (addr_len < sizeof(struct sockaddr_iucv))
+		return -EINVAL;
+
 	lock_sock(sk);
 	if (sk->sk_state != IUCV_OPEN) {
 		err = -EBADFD;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f93c5be..2caaa84 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -124,8 +124,13 @@
 	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
 				  NLM_F_ACK, tunnel, cmd);
 
-	if (ret >= 0)
-		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+	if (ret >= 0) {
+		ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+		/* We don't care if no one is listening */
+		if (ret == -ESRCH)
+			ret = 0;
+		return ret;
+	}
 
 	nlmsg_free(msg);
 
@@ -147,8 +152,13 @@
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
 				   NLM_F_ACK, session, cmd);
 
-	if (ret >= 0)
-		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+	if (ret >= 0) {
+		ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+		/* We don't care if no one is listening */
+		if (ret == -ESRCH)
+			ret = 0;
+		return ret;
+	}
 
 	nlmsg_free(msg);
 
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 10ad4ac..367784b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -291,7 +291,7 @@
 	}
 
 	/* prepare A-MPDU MLME for Rx aggregation */
-	tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+	tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
 	if (!tid_agg_rx)
 		goto end;
 
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f7fc0e0..978d3bc 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1733,7 +1733,6 @@
 		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			continue;
 		sdata->u.ibss.last_scan_completed = jiffies;
-		ieee80211_queue_work(&local->hw, &sdata->work);
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b84f6aa..f006f4a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,7 +92,7 @@
 	u16 extra_len;
 	u16 last_frag;
 	u8 rx_queue;
-	bool ccmp; /* Whether fragments were encrypted with CCMP */
+	bool check_sequential_pn; /* needed for CCMP/GCMP */
 	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
 };
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6bcf0fa..8190bf2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -248,6 +248,7 @@
 
 	/* wait for scan work complete */
 	flush_workqueue(local->workqueue);
+	flush_work(&local->sched_scan_stopped_work);
 
 	WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
 	     "%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@
 	list_for_each_entry(sdata, &local->interfaces, list)
 		flush_delayed_work(&sdata->dec_tailroom_needed_wk);
 	ieee80211_scan_cancel(local);
+
+	/* make sure any new ROC will consider local->in_reconfig */
+	flush_delayed_work(&local->roc_work);
+	flush_work(&local->hw_roc_done);
+
 	ieee80211_reconfig(local);
 	rtnl_unlock();
 }
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index fa28500..6f85b6a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1370,17 +1370,6 @@
 	sdata_unlock(sdata);
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
-	struct ieee80211_sub_if_data *sdata;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		if (ieee80211_vif_is_mesh(&sdata->vif) &&
-		    ieee80211_sdata_running(sdata))
-			ieee80211_queue_work(&local->hw, &sdata->work);
-	rcu_read_unlock();
-}
 
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index a159634..4a8019f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -362,14 +362,10 @@
 	return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
 void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_stop(void);
 #else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
 static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
 { return false; }
 static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1c342e2..bfbb1ac 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4005,8 +4005,6 @@
 		if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
 			ieee80211_queue_work(&sdata->local->hw,
 					     &sdata->u.mgd.monitor_work);
-		/* and do all the other regular work too */
-		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 	}
 }
 
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 8b2f4ea..55a9c5b 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -252,14 +252,11 @@
 static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
 					 unsigned long start_time)
 {
-	struct ieee80211_local *local = roc->sdata->local;
-
 	if (WARN_ON(roc->notified))
 		return;
 
 	roc->start_time = start_time;
 	roc->started = true;
-	roc->hw_begun = true;
 
 	if (roc->mgmt_tx_cookie) {
 		if (!WARN_ON(!roc->frame)) {
@@ -274,9 +271,6 @@
 	}
 
 	roc->notified = true;
-
-	if (!local->ops->remain_on_channel)
-		ieee80211_recalc_sw_work(local, start_time);
 }
 
 static void ieee80211_hw_roc_start(struct work_struct *work)
@@ -291,6 +285,7 @@
 		if (!roc->started)
 			break;
 
+		roc->hw_begun = true;
 		ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
 	}
 
@@ -413,6 +408,10 @@
 		return;
 	}
 
+	/* defer roc if driver is not started (i.e. during reconfig) */
+	if (local->in_reconfig)
+		return;
+
 	roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
 			       list);
 
@@ -534,8 +533,10 @@
 	 * begin, otherwise they'll both be marked properly by the work
 	 * struct that runs once the driver notifies us of the beginning
 	 */
-	if (cur_roc->hw_begun)
+	if (cur_roc->hw_begun) {
+		new_roc->hw_begun = true;
 		ieee80211_handle_roc_started(new_roc, now);
+	}
 
 	return true;
 }
@@ -658,6 +659,7 @@
 			queued = true;
 			roc->on_channel = tmp->on_channel;
 			ieee80211_handle_roc_started(roc, now);
+			ieee80211_recalc_sw_work(local, now);
 			break;
 		}
 
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1..b54f398 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -711,7 +711,7 @@
 	 * computing cur_tp
 	 */
 	tmp_mrs = &mi->r[idx].stats;
-	tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+	tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
 	tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
 
 	return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd..370d677 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -414,15 +414,16 @@
 	    (max_tp_group != MINSTREL_CCK_GROUP))
 		return;
 
+	max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
+	max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+	max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+
 	if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
 		cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
 						    mrs->prob_ewma);
 		if (cur_tp_avg > tmp_tp_avg)
 			mi->max_prob_rate = index;
 
-		max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
-		max_gpr_idx = mg->max_group_prob_rate %	MCS_GROUP_RATES;
-		max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
 		max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
 							max_gpr_idx,
 							max_gpr_prob);
@@ -431,7 +432,7 @@
 	} else {
 		if (mrs->prob_ewma > tmp_prob)
 			mi->max_prob_rate = index;
-		if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma)
+		if (mrs->prob_ewma > max_gpr_prob)
 			mg->max_group_prob_rate = index;
 	}
 }
@@ -691,7 +692,7 @@
 	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
-	ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+	ieee80211_start_tx_ba_session(pubsta, tid, 0);
 }
 
 static void
@@ -871,7 +872,7 @@
 	 *  - if station is in dynamic SMPS (and streams > 1)
 	 *  - for fallback rates, to increase chances of getting through
 	 */
-	if (offset > 0 &&
+	if (offset > 0 ||
 	    (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
 	     group->streams > 1)) {
 		ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
@@ -1334,7 +1335,8 @@
 	prob = mi->groups[i].rates[j].prob_ewma;
 
 	/* convert tp_avg from pkt per second in kbps */
-	tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
+	tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
+	tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
 
 	return tp_avg;
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index bc08185..60d093f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1753,7 +1753,7 @@
 	entry->seq = seq;
 	entry->rx_queue = rx_queue;
 	entry->last_frag = frag;
-	entry->ccmp = 0;
+	entry->check_sequential_pn = false;
 	entry->extra_len = 0;
 
 	return entry;
@@ -1849,15 +1849,27 @@
 						 rx->seqno_idx, &(rx->skb));
 		if (rx->key &&
 		    (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
 		    ieee80211_has_protected(fc)) {
 			int queue = rx->security_idx;
-			/* Store CCMP PN so that we can verify that the next
-			 * fragment has a sequential PN value. */
-			entry->ccmp = 1;
+
+			/* Store CCMP/GCMP PN so that we can verify that the
+			 * next fragment has a sequential PN value.
+			 */
+			entry->check_sequential_pn = true;
 			memcpy(entry->last_pn,
 			       rx->key->u.ccmp.rx_pn[queue],
 			       IEEE80211_CCMP_PN_LEN);
+			BUILD_BUG_ON(offsetof(struct ieee80211_key,
+					      u.ccmp.rx_pn) !=
+				     offsetof(struct ieee80211_key,
+					      u.gcmp.rx_pn));
+			BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
+				     sizeof(rx->key->u.gcmp.rx_pn[queue]));
+			BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
+				     IEEE80211_GCMP_PN_LEN);
 		}
 		return RX_QUEUED;
 	}
@@ -1872,15 +1884,21 @@
 		return RX_DROP_MONITOR;
 	}
 
-	/* Verify that MPDUs within one MSDU have sequential PN values.
-	 * (IEEE 802.11i, 8.3.3.4.5) */
-	if (entry->ccmp) {
+	/* "The receiver shall discard MSDUs and MMPDUs whose constituent
+	 *  MPDU PN values are not incrementing in steps of 1."
+	 * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
+	 * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
+	 */
+	if (entry->check_sequential_pn) {
 		int i;
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
 		int queue;
+
 		if (!rx->key ||
 		    (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -3366,6 +3384,7 @@
 				return false;
 			/* ignore action frames to TDLS-peers */
 			if (ieee80211_is_action(hdr->frame_control) &&
+			    !is_broadcast_ether_addr(bssid) &&
 			    !ether_addr_equal(bssid, hdr->addr1))
 				return false;
 		}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a413e52..ae980ce 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -314,6 +314,7 @@
 	bool was_scanning = local->scanning;
 	struct cfg80211_scan_request *scan_req;
 	struct ieee80211_sub_if_data *scan_sdata;
+	struct ieee80211_sub_if_data *sdata;
 
 	lockdep_assert_held(&local->mtx);
 
@@ -373,7 +374,16 @@
 
 	ieee80211_mlme_notify_scan_completed(local);
 	ieee80211_ibss_notify_scan_completed(local);
-	ieee80211_mesh_notify_scan_completed(local);
+
+	/* Requeue all the work that might have been ignored while
+	 * the scan was in progress; if there was none this will
+	 * just be a no-op for the particular interface.
+	 */
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (ieee80211_sdata_running(sdata))
+			ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+	}
+
 	if (was_scanning)
 		ieee80211_start_next_roc(local);
 }
@@ -1213,6 +1223,14 @@
 
 	trace_api_sched_scan_stopped(local);
 
+	/*
+	 * this shouldn't really happen, so for simplicity
+	 * simply ignore it, and let mac80211 reconfigure
+	 * the sched scan later on.
+	 */
+	if (local->in_reconfig)
+		return;
+
 	schedule_work(&local->sched_scan_stopped_work);
 }
 EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4402ad5..a4a4f89 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1453,7 +1453,7 @@
 
 	more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
 
-	if (reason == IEEE80211_FRAME_RELEASE_PSPOLL)
+	if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
 		driver_release_tids =
 			BIT(find_highest_prio_tid(driver_release_tids));
 
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 5bad05e..6101deb 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,6 +51,11 @@
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	int ac;
 
+	if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+		ieee80211_free_txskb(&local->hw, skb);
+		return;
+	}
+
 	/*
 	 * This skb 'survived' a round-trip through the driver, and
 	 * hopefully the driver didn't mangle it too badly. However,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3943d4b..58f58bd 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2043,16 +2043,26 @@
 		 */
 		if (sched_scan_req->n_scan_plans > 1 ||
 		    __ieee80211_request_sched_scan_start(sched_scan_sdata,
-							 sched_scan_req))
+							 sched_scan_req)) {
+			RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
+			RCU_INIT_POINTER(local->sched_scan_req, NULL);
 			sched_scan_stopped = true;
+		}
 	mutex_unlock(&local->mtx);
 
 	if (sched_scan_stopped)
 		cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
 
  wake_up:
-	local->in_reconfig = false;
-	barrier();
+	if (local->in_reconfig) {
+		local->in_reconfig = false;
+		barrier();
+
+		/* Restart deferred ROCs */
+		mutex_lock(&local->mtx);
+		ieee80211_start_next_roc(local);
+		mutex_unlock(&local->mtx);
+	}
 
 	if (local->monitors == local->open_count && local->monitors > 0)
 		ieee80211_add_virtual_monitor(local);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8c067e6..95e757c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -891,7 +891,7 @@
 	depends on IPV6 || IPV6=n
 	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DUP_IPV4
-	select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
+	select NF_DUP_IPV6 if IPV6
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 43d8c98..f0f688d 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -164,8 +164,6 @@
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-	if (e.cidr == 0)
-		return -EINVAL;
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK;
 
@@ -377,8 +375,6 @@
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-	if (e.cidr == 0)
-		return -EINVAL;
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK;
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb8..f60b4fd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,21 @@
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+	spin_lock(lock);
+	while (unlikely(nf_conntrack_locks_all)) {
+		spin_unlock(lock);
+		spin_lock(&nf_conntrack_locks_all_lock);
+		spin_unlock(&nf_conntrack_locks_all_lock);
+		spin_lock(lock);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 {
 	h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@
 	h1 %= CONNTRACK_LOCKS;
 	h2 %= CONNTRACK_LOCKS;
 	if (h1 <= h2) {
-		spin_lock(&nf_conntrack_locks[h1]);
+		nf_conntrack_lock(&nf_conntrack_locks[h1]);
 		if (h1 != h2)
 			spin_lock_nested(&nf_conntrack_locks[h2],
 					 SINGLE_DEPTH_NESTING);
 	} else {
-		spin_lock(&nf_conntrack_locks[h2]);
+		nf_conntrack_lock(&nf_conntrack_locks[h2]);
 		spin_lock_nested(&nf_conntrack_locks[h1],
 				 SINGLE_DEPTH_NESTING);
 	}
@@ -102,16 +117,19 @@
 {
 	int i;
 
-	for (i = 0; i < CONNTRACK_LOCKS; i++)
-		spin_lock_nested(&nf_conntrack_locks[i], i);
+	spin_lock(&nf_conntrack_locks_all_lock);
+	nf_conntrack_locks_all = true;
+
+	for (i = 0; i < CONNTRACK_LOCKS; i++) {
+		spin_lock(&nf_conntrack_locks[i]);
+		spin_unlock(&nf_conntrack_locks[i]);
+	}
 }
 
 static void nf_conntrack_all_unlock(void)
 {
-	int i;
-
-	for (i = 0; i < CONNTRACK_LOCKS; i++)
-		spin_unlock(&nf_conntrack_locks[i]);
+	nf_conntrack_locks_all = false;
+	spin_unlock(&nf_conntrack_locks_all_lock);
 }
 
 unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@
 	hash = hash_bucket(_hash, net);
 	for (; i < net->ct.htable_size; i++) {
 		lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-		spin_lock(lockp);
+		nf_conntrack_lock(lockp);
 		if (read_seqcount_retry(&net->ct.generation, sequence)) {
 			spin_unlock(lockp);
 			goto restart;
@@ -1382,7 +1400,7 @@
 	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
 		local_bh_disable();
-		spin_lock(lockp);
+		nf_conntrack_lock(lockp);
 		if (*bucket < net->ct.htable_size) {
 			hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 				if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -1394,6 +1412,7 @@
 		}
 		spin_unlock(lockp);
 		local_bh_enable();
+		cond_resched();
 	}
 
 	for_each_possible_cpu(cpu) {
@@ -1406,6 +1425,7 @@
 				set_bit(IPS_DYING_BIT, &ct->status);
 		}
 		spin_unlock_bh(&pcpu->lock);
+		cond_resched();
 	}
 	return NULL;
 found:
@@ -1422,6 +1442,8 @@
 	struct nf_conn *ct;
 	unsigned int bucket = 0;
 
+	might_sleep();
+
 	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
@@ -1430,6 +1452,7 @@
 		/* ... else the timer will get him soon. */
 
 		nf_ct_put(ct);
+		cond_resched();
 	}
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index bd9d315..3b40ec5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -425,7 +425,7 @@
 	}
 	local_bh_disable();
 	for (i = 0; i < net->ct.htable_size; i++) {
-		spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+		nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
 		if (i < net->ct.htable_size) {
 			hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 				unhelp(h, me);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dbb1bb3..355e855 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -840,7 +840,7 @@
 	for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
 restart:
 		lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
-		spin_lock(lockp);
+		nf_conntrack_lock(lockp);
 		if (cb->args[0] >= net->ct.htable_size) {
 			spin_unlock(lockp);
 			goto out;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index b6605e0..5eefe4a 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -224,12 +224,12 @@
 
 	nft_register_chain_type(&nft_filter_chain_netdev);
 	ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
 		nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+		return ret;
+	}
 	register_netdevice_notifier(&nf_tables_netdev_notifier);
-
-	return ret;
+	return 0;
 }
 
 static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a7ba233..857ae89 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -311,14 +311,14 @@
 #endif
 		{
 			nfnl_unlock(subsys_id);
-			netlink_ack(skb, nlh, -EOPNOTSUPP);
+			netlink_ack(oskb, nlh, -EOPNOTSUPP);
 			return kfree_skb(skb);
 		}
 	}
 
 	if (!ss->commit || !ss->abort) {
 		nfnl_unlock(subsys_id);
-		netlink_ack(skb, nlh, -EOPNOTSUPP);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP);
 		return kfree_skb(skb);
 	}
 
@@ -328,10 +328,12 @@
 		nlh = nlmsg_hdr(skb);
 		err = 0;
 
-		if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
-		    skb->len < nlh->nlmsg_len) {
-			err = -EINVAL;
-			goto ack;
+		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+		    skb->len < nlh->nlmsg_len ||
+		    nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+			nfnl_err_reset(&err_list);
+			status |= NFNL_BATCH_FAILURE;
+			goto done;
 		}
 
 		/* Only requests are handled by the kernel */
@@ -406,7 +408,7 @@
 				 * pointing to the batch header.
 				 */
 				nfnl_err_reset(&err_list);
-				netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+				netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
 				status |= NFNL_BATCH_FAILURE;
 				goto done;
 			}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 5d010f2..2671b9d 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -307,7 +307,7 @@
 
 	local_bh_disable();
 	for (i = 0; i < net->ct.htable_size; i++) {
-		spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+		nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
 		if (i < net->ct.htable_size) {
 			hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 				untimeout(h, timeout);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 383c171..b78c28b 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -46,16 +46,14 @@
 		switch (priv->op) {
 		case NFT_BYTEORDER_NTOH:
 			for (i = 0; i < priv->len / 8; i++) {
-				src64 = get_unaligned_be64(&src[i]);
-				src64 = be64_to_cpu((__force __be64)src64);
+				src64 = get_unaligned((u64 *)&src[i]);
 				put_unaligned_be64(src64, &dst[i]);
 			}
 			break;
 		case NFT_BYTEORDER_HTON:
 			for (i = 0; i < priv->len / 8; i++) {
 				src64 = get_unaligned_be64(&src[i]);
-				src64 = (__force u64)cpu_to_be64(src64);
-				put_unaligned_be64(src64, &dst[i]);
+				put_unaligned(src64, (u64 *)&dst[i]);
 			}
 			break;
 		}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c7808fc..c9743f7 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -100,7 +100,7 @@
 
 	cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
 	if (cpu_stats == NULL)
-		return ENOMEM;
+		return -ENOMEM;
 
 	preempt_disable();
 	this_cpu = this_cpu_ptr(cpu_stats);
@@ -138,7 +138,7 @@
 	cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
 					      GFP_ATOMIC);
 	if (cpu_stats == NULL)
-		return ENOMEM;
+		return -ENOMEM;
 
 	preempt_disable();
 	this_cpu = this_cpu_ptr(cpu_stats);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a0eb216..d4a4619 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -127,6 +127,7 @@
 			       NF_CT_LABELS_MAX_SIZE - size);
 		return;
 	}
+#endif
 	case NFT_CT_BYTES: /* fallthrough */
 	case NFT_CT_PKTS: {
 		const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
@@ -138,7 +139,6 @@
 		memcpy(dest, &count, sizeof(count));
 		return;
 	}
-#endif
 	default:
 		break;
 	}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index b7c43de..e118397 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -228,7 +228,7 @@
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
-	__be16 frag_off;
+	__be16 frag_off, oldlen, newlen;
 	int tcphoff;
 	int ret;
 
@@ -244,7 +244,12 @@
 		return NF_DROP;
 	if (ret > 0) {
 		ipv6h = ipv6_hdr(skb);
-		ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+		oldlen = ipv6h->payload_len;
+		newlen = htons(ntohs(oldlen) + ret);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+					     newlen);
+		ipv6h->payload_len = newlen;
 	}
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 3eff7b6..6e57a39 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -38,7 +38,7 @@
 	return XT_CONTINUE;
 }
 
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -131,7 +131,7 @@
 		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.name       = "TEE",
 		.revision   = 1,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 81dc1bb..f1ffb34 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2831,7 +2831,8 @@
 	 * reasonable static buffer based on the expected largest dump of a
 	 * single netdev. The outcome is MSG_TRUNC error.
 	 */
-	skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+	if (!netlink_rx_is_mmaped(sk))
+		skb_reserve(skb, skb_tailroom(skb) - alloc_size);
 	netlink_skb_set_owner_r(skb, sk);
 
 	len = cb->dump(skb, cb);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1605691..5eb7694 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -90,7 +90,9 @@
 	int err;
 	struct vxlan_config conf = {
 		.no_share = true,
-		.flags = VXLAN_F_COLLECT_METADATA,
+		.flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+		/* Don't restrict the packets that can be sent by MTU */
+		.mtu = IP_MAX_MTU,
 	};
 
 	if (!options) {
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index f53bf3b6..cf5b69a 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1095,17 +1095,6 @@
 	return res;
 }
 
-static bool rfkill_readable(struct rfkill_data *data)
-{
-	bool r;
-
-	mutex_lock(&data->mtx);
-	r = !list_empty(&data->events);
-	mutex_unlock(&data->mtx);
-
-	return r;
-}
-
 static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *pos)
 {
@@ -1122,8 +1111,11 @@
 			goto out;
 		}
 		mutex_unlock(&data->mtx);
+		/* since we re-check and it just compares pointers,
+		 * using !list_empty() without locking isn't a problem
+		 */
 		ret = wait_event_interruptible(data->read_wait,
-					       rfkill_readable(data));
+					       !list_empty(&data->events));
 		mutex_lock(&data->mtx);
 
 		if (ret)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d058696..6b70399 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -62,6 +62,7 @@
 	struct xt_tgdtor_param par = {
 		.target   = t->u.kernel.target,
 		.targinfo = t->data,
+		.family   = NFPROTO_IPV4,
 	};
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
@@ -195,6 +196,7 @@
 	par.hooknum  = ipt->tcfi_hook;
 	par.target   = ipt->tcfi_t->u.kernel.target;
 	par.targinfo = ipt->tcfi_t->data;
+	par.family   = NFPROTO_IPV4;
 	ret = par.target->target(skb, &par);
 
 	switch (ret) {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b5c2cf2..af1acf0 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1852,6 +1852,7 @@
 	}
 
 	tp = old_tp;
+	protocol = tc_skb_protocol(skb);
 	goto reclassify;
 #endif
 }
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f26bdea..a1cd778 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -403,6 +403,8 @@
 		if (len <= cl->deficit) {
 			cl->deficit -= len;
 			skb = qdisc_dequeue_peeked(cl->qdisc);
+			if (unlikely(skb == NULL))
+				goto out;
 			if (cl->qdisc->q.qlen == 0)
 				list_del(&cl->alist);
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index bf61dfb..49d2cc7 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -935,15 +935,22 @@
 					struct sctp_transport **pt)
 {
 	struct sctp_transport *t;
+	struct sctp_association *asoc = NULL;
 
+	rcu_read_lock();
 	t = sctp_addrs_lookup_transport(net, local, peer);
-	if (!t || t->dead)
-		return NULL;
+	if (!t || !sctp_transport_hold(t))
+		goto out;
 
-	sctp_association_hold(t->asoc);
+	asoc = t->asoc;
+	sctp_association_hold(asoc);
 	*pt = t;
 
-	return t->asoc;
+	sctp_transport_put(t);
+
+out:
+	rcu_read_unlock();
+	return asoc;
 }
 
 /* Look up an association. protected by RCU read lock */
@@ -955,9 +962,7 @@
 {
 	struct sctp_association *asoc;
 
-	rcu_read_lock();
 	asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
-	rcu_read_unlock();
 
 	return asoc;
 }
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ec52912..ce46f1c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -526,6 +526,8 @@
 		}
 		return 0;
 	}
+	if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+		return 0;
 	if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
 		return 0;
 	/* If this is a linklocal address, compare the scope_id. */
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 684c5b3..963dffc 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -165,8 +165,6 @@
 	list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
 			transports) {
 		addr = &transport->ipaddr;
-		if (transport->dead)
-			continue;
 
 		af = sctp_get_af_specific(addr->sa.sa_family);
 		if (af->cmp_addr(addr, primary)) {
@@ -380,6 +378,8 @@
 	}
 
 	transport = (struct sctp_transport *)v;
+	if (!sctp_transport_hold(transport))
+		return 0;
 	assoc = transport->asoc;
 	epb = &assoc->base;
 	sk = epb->sk;
@@ -412,6 +412,8 @@
 		sk->sk_rcvbuf);
 	seq_printf(seq, "\n");
 
+	sctp_transport_put(transport);
+
 	return 0;
 }
 
@@ -480,7 +482,7 @@
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
 	struct sctp_association *assoc;
-	struct sctp_transport *tsp;
+	struct sctp_transport *transport, *tsp;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
@@ -488,13 +490,13 @@
 		return 0;
 	}
 
-	tsp = (struct sctp_transport *)v;
-	assoc = tsp->asoc;
+	transport = (struct sctp_transport *)v;
+	if (!sctp_transport_hold(transport))
+		return 0;
+	assoc = transport->asoc;
 
 	list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
 				transports) {
-		if (tsp->dead)
-			continue;
 		/*
 		 * The remote address (ADDR)
 		 */
@@ -544,6 +546,8 @@
 		seq_printf(seq, "\n");
 	}
 
+	sctp_transport_put(transport);
+
 	return 0;
 }
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ab0d538..1099e99 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -60,6 +60,8 @@
 #include <net/inet_common.h>
 #include <net/inet_ecn.h>
 
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
 /* Global data structures. */
 struct sctp_globals sctp_globals __read_mostly;
 
@@ -1355,6 +1357,8 @@
 	unsigned long limit;
 	int max_share;
 	int order;
+	int num_entries;
+	int max_entry_order;
 
 	sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
 
@@ -1407,14 +1411,24 @@
 
 	/* Size and allocate the association hash table.
 	 * The methodology is similar to that of the tcp hash tables.
+	 * Though not identical.  Start by getting a goal size
 	 */
 	if (totalram_pages >= (128 * 1024))
 		goal = totalram_pages >> (22 - PAGE_SHIFT);
 	else
 		goal = totalram_pages >> (24 - PAGE_SHIFT);
 
-	for (order = 0; (1UL << order) < goal; order++)
-		;
+	/* Then compute the page order for said goal */
+	order = get_order(goal);
+
+	/* Now compute the required page order for the maximum sized table we
+	 * want to create
+	 */
+	max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+				    sizeof(struct sctp_bind_hashbucket));
+
+	/* Limit the page order by that maximum hash table size */
+	order = min(order, max_entry_order);
 
 	/* Allocate and initialize the endpoint hash table.  */
 	sctp_ep_hashsize = 64;
@@ -1430,20 +1444,35 @@
 		INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
 	}
 
-	/* Allocate and initialize the SCTP port hash table.  */
+	/* Allocate and initialize the SCTP port hash table.
+	 * Note that order is initalized to start at the max sized
+	 * table we want to support.  If we can't get that many pages
+	 * reduce the order and try again
+	 */
 	do {
-		sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
-					sizeof(struct sctp_bind_hashbucket);
-		if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
-			continue;
 		sctp_port_hashtable = (struct sctp_bind_hashbucket *)
 			__get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
 	} while (!sctp_port_hashtable && --order > 0);
+
 	if (!sctp_port_hashtable) {
 		pr_err("Failed bind hash alloc\n");
 		status = -ENOMEM;
 		goto err_bhash_alloc;
 	}
+
+	/* Now compute the number of entries that will fit in the
+	 * port hash space we allocated
+	 */
+	num_entries = (1UL << order) * PAGE_SIZE /
+		      sizeof(struct sctp_bind_hashbucket);
+
+	/* And finish by rounding it down to the nearest power of two
+	 * this wastes some memory of course, but its needed because
+	 * the hash function operates based on the assumption that
+	 * that the number of entries is a power of two
+	 */
+	sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
 	for (i = 0; i < sctp_port_hashsize; i++) {
 		spin_lock_init(&sctp_port_hashtable[i].lock);
 		INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
@@ -1452,7 +1481,8 @@
 	if (sctp_transport_hashtable_init())
 		goto err_thash_alloc;
 
-	pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize);
+	pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
+		num_entries);
 
 	sctp_sysctl_register();
 
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 2e21384..b5327bb 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -259,12 +259,6 @@
 		goto out_unlock;
 	}
 
-	/* Is this transport really dead and just waiting around for
-	 * the timer to let go of the reference?
-	 */
-	if (transport->dead)
-		goto out_unlock;
-
 	/* Run through the state machine.  */
 	error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
 			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +374,6 @@
 		goto out_unlock;
 	}
 
-	/* Is this structure just waiting around for us to actually
-	 * get destroyed?
-	 */
-	if (transport->dead)
-		goto out_unlock;
-
 	error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
 			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
 			   asoc->state, asoc->ep, asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9bb80ec..e878da0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5538,6 +5538,7 @@
 	struct sctp_hmac_algo_param *hmacs;
 	__u16 data_len = 0;
 	u32 num_idents;
+	int i;
 
 	if (!ep->auth_enable)
 		return -EACCES;
@@ -5555,8 +5556,12 @@
 		return -EFAULT;
 	if (put_user(num_idents, &p->shmac_num_idents))
 		return -EFAULT;
-	if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
-		return -EFAULT;
+	for (i = 0; i < num_idents; i++) {
+		__u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+		if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+			return -EFAULT;
+	}
 	return 0;
 }
 
@@ -6636,6 +6641,7 @@
 
 			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+			      SCTP_SACK_IMMEDIATELY |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
@@ -6659,6 +6665,7 @@
 
 			if (cmsgs->sinfo->snd_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+			      SCTP_SACK_IMMEDIATELY |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index aab9e3f..a431c14 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -132,8 +132,6 @@
  */
 void sctp_transport_free(struct sctp_transport *transport)
 {
-	transport->dead = 1;
-
 	/* Try to delete the heartbeat timer.  */
 	if (del_timer(&transport->hb_timer))
 		sctp_transport_put(transport);
@@ -169,7 +167,7 @@
  */
 static void sctp_transport_destroy(struct sctp_transport *transport)
 {
-	if (unlikely(!transport->dead)) {
+	if (unlikely(atomic_read(&transport->refcnt))) {
 		WARN(1, "Attempt to destroy undead transport %p!\n", transport);
 		return;
 	}
@@ -296,9 +294,9 @@
 }
 
 /* Hold a reference to a transport.  */
-void sctp_transport_hold(struct sctp_transport *transport)
+int sctp_transport_hold(struct sctp_transport *transport)
 {
-	atomic_inc(&transport->refcnt);
+	return atomic_add_unless(&transport->refcnt, 1, 0);
 }
 
 /* Release a reference to a transport and clean up
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 799e65b..cabf586 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -740,7 +740,7 @@
 		default:
 			printk(KERN_CRIT "%s: bad return from "
 				"gss_fill_context: %zd\n", __func__, err);
-			BUG();
+			gss_msg->msg.errno = -EIO;
 		}
 		goto err_release_msg;
 	}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b32fd6..273bc3a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1225,7 +1225,7 @@
 	if (bp[0] == '\\' && bp[1] == 'x') {
 		/* HEX STRING */
 		bp += 2;
-		while (len < bufsize) {
+		while (len < bufsize - 1) {
 			int h, l;
 
 			h = hex_to_bin(bp[0]);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index cc1251d..2dcd764 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -341,6 +341,8 @@
 	rqst->rq_reply_bytes_recvd = 0;
 	rqst->rq_bytes_sent = 0;
 	rqst->rq_xid = headerp->rm_xid;
+
+	rqst->rq_private_buf.len = size;
 	set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
 
 	buf = &rqst->rq_rcv_buf;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index ebc661d..8b5833c 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
 #include <net/ip_fib.h>
 #include <net/switchdev.h>
 
@@ -567,7 +568,6 @@
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
 
-static DEFINE_MUTEX(switchdev_mutex);
 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -582,9 +582,9 @@
 {
 	int err;
 
-	mutex_lock(&switchdev_mutex);
+	rtnl_lock();
 	err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
-	mutex_unlock(&switchdev_mutex);
+	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -600,9 +600,9 @@
 {
 	int err;
 
-	mutex_lock(&switchdev_mutex);
+	rtnl_lock();
 	err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
-	mutex_unlock(&switchdev_mutex);
+	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,16 +616,17 @@
  *	Call all network notifier blocks. This should be called by driver
  *	when it needs to propagate hardware event.
  *	Return values are same as for atomic_notifier_call_chain().
+ *	rtnl_lock must be held.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 			     struct switchdev_notifier_info *info)
 {
 	int err;
 
+	ASSERT_RTNL();
+
 	info->dev = dev;
-	mutex_lock(&switchdev_mutex);
 	err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
-	mutex_unlock(&switchdev_mutex);
 	return err;
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
@@ -1092,8 +1093,11 @@
 		.cb = cb,
 		.idx = idx,
 	};
+	int err;
 
-	switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
+	err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
+				      switchdev_port_fdb_dump_cb);
+	cb->args[1] = err;
 	return dump.idx;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0c2944f..347cdc9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1973,8 +1973,10 @@
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
-	if (!hdr)
+	if (!hdr) {
+		tipc_bcast_unlock(net);
 		return -EMSGSIZE;
+	}
 
 	attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
 	if (!attrs)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index fa97d96..9d7a16f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -346,12 +346,6 @@
 	skb_queue_head_init(&n->bc_entry.inputq2);
 	for (i = 0; i < MAX_BEARERS; i++)
 		spin_lock_init(&n->links[i].lock);
-	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
-	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
-		if (n->addr < temp_node->addr)
-			break;
-	}
-	list_add_tail_rcu(&n->list, &temp_node->list);
 	n->state = SELF_DOWN_PEER_LEAVING;
 	n->signature = INVALID_NODE_SIG;
 	n->active_links[0] = INVALID_BEARER_ID;
@@ -372,6 +366,12 @@
 	tipc_node_get(n);
 	setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
 	n->keepalive_intv = U32_MAX;
+	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+		if (n->addr < temp_node->addr)
+			break;
+	}
+	list_add_tail_rcu(&n->list, &temp_node->list);
 exit:
 	spin_unlock_bh(&tn->node_list_lock);
 	return n;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 69c2905..4d420bb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -673,7 +673,7 @@
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	struct iov_iter save = msg->msg_iter;
 	uint mtu;
 	int rc;
@@ -687,14 +687,16 @@
 	msg_set_nameupper(mhdr, seq->upper);
 	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
 
+	skb_queue_head_init(&pktchain);
+
 new_mtu:
 	mtu = tipc_bcast_get_mtu(net);
-	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
 	if (unlikely(rc < 0))
 		return rc;
 
 	do {
-		rc = tipc_bcast_xmit(net, pktchain);
+		rc = tipc_bcast_xmit(net, &pktchain);
 		if (likely(!rc))
 			return dsz;
 
@@ -704,7 +706,7 @@
 			if (!rc)
 				continue;
 		}
-		__skb_queue_purge(pktchain);
+		__skb_queue_purge(&pktchain);
 		if (rc == -EMSGSIZE) {
 			msg->msg_iter = save;
 			goto new_mtu;
@@ -863,7 +865,7 @@
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
 	u32 dnode, dport;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	struct sk_buff *skb;
 	struct tipc_name_seq *seq;
 	struct iov_iter save;
@@ -924,17 +926,18 @@
 		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
 	}
 
+	skb_queue_head_init(&pktchain);
 	save = m->msg_iter;
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
-	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
 	if (rc < 0)
 		return rc;
 
 	do {
-		skb = skb_peek(pktchain);
+		skb = skb_peek(&pktchain);
 		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+		rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
 		if (likely(!rc)) {
 			if (sock->state != SS_READY)
 				sock->state = SS_CONNECTING;
@@ -946,7 +949,7 @@
 			if (!rc)
 				continue;
 		}
-		__skb_queue_purge(pktchain);
+		__skb_queue_purge(&pktchain);
 		if (rc == -EMSGSIZE) {
 			m->msg_iter = save;
 			goto new_mtu;
@@ -1016,7 +1019,7 @@
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	u32 portid = tsk->portid;
 	int rc = -EINVAL;
@@ -1044,17 +1047,19 @@
 
 	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 	dnode = tsk_peer_node(tsk);
+	skb_queue_head_init(&pktchain);
 
 next:
 	save = m->msg_iter;
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
-	rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
 	if (unlikely(rc < 0))
 		return rc;
+
 	do {
 		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_node_xmit(net, pktchain, dnode, portid);
+			rc = tipc_node_xmit(net, &pktchain, dnode, portid);
 			if (likely(!rc)) {
 				tsk->sent_unacked++;
 				sent += send;
@@ -1063,7 +1068,7 @@
 				goto next;
 			}
 			if (rc == -EMSGSIZE) {
-				__skb_queue_purge(pktchain);
+				__skb_queue_purge(&pktchain);
 				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
 								 portid);
 				m->msg_iter = save;
@@ -1077,7 +1082,7 @@
 		rc = tipc_wait_for_sndpkt(sock, &timeo);
 	} while (!rc);
 
-	__skb_queue_purge(pktchain);
+	__skb_queue_purge(&pktchain);
 	return sent ? sent : rc;
 }
 
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 350cca3..f9ff73a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -289,15 +289,15 @@
 				struct sockaddr_tipc *addr, void *usr_data,
 				void *buf, size_t len)
 {
-	struct tipc_subscriber *subscriber = usr_data;
+	struct tipc_subscriber *subscrb = usr_data;
 	struct tipc_subscription *sub = NULL;
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-	tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
+	if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
+		return tipc_conn_terminate(tn->topsrv, subscrb->conid);
+
 	if (sub)
 		tipc_nametbl_subscribe(sub);
-	else
-		tipc_conn_terminate(tn->topsrv, subscriber->conid);
 }
 
 /* Handle one request to establish a new subscriber */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c5bf5ef..f75f847 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1496,7 +1496,7 @@
 	UNIXCB(skb).fp = NULL;
 
 	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_notinflight(scm->fp->fp[i]);
+		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
 }
 
 static void unix_destruct_scm(struct sk_buff *skb)
@@ -1561,7 +1561,7 @@
 		return -ENOMEM;
 
 	for (i = scm->fp->count - 1; i >= 0; i--)
-		unix_inflight(scm->fp->fp[i]);
+		unix_inflight(scm->fp->user, scm->fp->fp[i]);
 	return max_level;
 }
 
@@ -1781,7 +1781,12 @@
 			goto out_unlock;
 	}
 
-	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+	/* other == sk && unix_peer(other) != sk if
+	 * - unix_peer(sk) == NULL, destination address bound to sk
+	 * - unix_peer(sk) == sk by time of get but disconnected before lock
+	 */
+	if (other != sk &&
+	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
 		if (timeo) {
 			timeo = unix_wait_for_peer(other, timeo);
 
@@ -2277,13 +2282,15 @@
 	size_t size = state->size;
 	unsigned int last_len;
 
-	err = -EINVAL;
-	if (sk->sk_state != TCP_ESTABLISHED)
+	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+		err = -EINVAL;
 		goto out;
+	}
 
-	err = -EOPNOTSUPP;
-	if (flags & MSG_OOB)
+	if (unlikely(flags & MSG_OOB)) {
+		err = -EOPNOTSUPP;
 		goto out;
+	}
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
 	timeo = sock_rcvtimeo(sk, noblock);
@@ -2305,6 +2312,7 @@
 		bool drop_skb;
 		struct sk_buff *skb, *last;
 
+redo:
 		unix_state_lock(sk);
 		if (sock_flag(sk, SOCK_DEAD)) {
 			err = -ECONNRESET;
@@ -2329,9 +2337,11 @@
 				goto unlock;
 
 			unix_state_unlock(sk);
-			err = -EAGAIN;
-			if (!timeo)
+			if (!timeo) {
+				err = -EAGAIN;
 				break;
+			}
+
 			mutex_unlock(&u->readlock);
 
 			timeo = unix_stream_data_wait(sk, timeo, last,
@@ -2339,11 +2349,12 @@
 
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeo);
+				scm_destroy(&scm);
 				goto out;
 			}
 
 			mutex_lock(&u->readlock);
-			continue;
+			goto redo;
 unlock:
 			unix_state_unlock(sk);
 			break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64..4d96797 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -220,7 +220,7 @@
 	return skb->len;
 }
 
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
 {
 	int i;
 	struct sock *sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8fcdc22..6a0d485 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,7 +116,7 @@
  * descriptor if it is for an AF_UNIX socket.
  */
 
-void unix_inflight(struct file *fp)
+void unix_inflight(struct user_struct *user, struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
@@ -133,11 +133,11 @@
 		}
 		unix_tot_inflight++;
 	}
-	fp->f_cred->user->unix_inflight++;
+	user->unix_inflight++;
 	spin_unlock(&unix_gc_lock);
 }
 
-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct user_struct *user, struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
@@ -152,7 +152,7 @@
 			list_del_init(&u->link);
 		unix_tot_inflight--;
 	}
-	fp->f_cred->user->unix_inflight--;
+	user->unix_inflight--;
 	spin_unlock(&unix_gc_lock);
 }
 
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220..bbe65dc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1557,8 +1557,6 @@
 	if (err < 0)
 		goto out;
 
-	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
 	while (total_written < len) {
 		ssize_t written;
 
@@ -1578,7 +1576,9 @@
 				goto out_wait;
 
 			release_sock(sk);
+			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			timeout = schedule_timeout(timeout);
+			finish_wait(sk_sleep(sk), &wait);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@
 				goto out_wait;
 			}
 
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
 
 		/* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@
 out_wait:
 	if (total_written > 0)
 		err = total_written;
-	finish_wait(sk_sleep(sk), &wait);
 out:
 	release_sock(sk);
 	return err;
@@ -1716,7 +1713,6 @@
 	if (err < 0)
 		goto out;
 
-	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	while (1) {
 		s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@
 			 */
 
 			err = -ENOMEM;
-			goto out_wait;
+			goto out;
 		} else if (ready > 0) {
 			ssize_t read;
 
@@ -1750,7 +1746,7 @@
 					vsk, target, read,
 					!(flags & MSG_PEEK), &recv_data);
 			if (err < 0)
-				goto out_wait;
+				goto out;
 
 			if (read >= target || flags & MSG_PEEK)
 				break;
@@ -1773,7 +1769,9 @@
 				break;
 
 			release_sock(sk);
+			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			timeout = schedule_timeout(timeout);
+			finish_wait(sk_sleep(sk), &wait);
 			lock_sock(sk);
 
 			if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@
 				err = -EAGAIN;
 				break;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
 	}
 
@@ -1816,8 +1811,6 @@
 		err = copied;
 	}
 
-out_wait:
-	finish_wait(sk_sleep(sk), &wait);
 out:
 	release_sock(sk);
 	return err;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b091551..8f0bac7 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1147,6 +1147,8 @@
 		return NOTIFY_DONE;
 	}
 
+	wireless_nlevent_flush();
+
 	return NOTIFY_OK;
 }
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d4786f2..711cb7a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7547,7 +7547,7 @@
 
 		if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) &&
 		    no_ht) {
-			kfree(connkeys);
+			kzfree(connkeys);
 			return -EINVAL;
 		}
 	}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3b0ce1c..547ceec 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -231,20 +231,22 @@
 		/* IEEE 802.11b/g, channels 1..11 */
 		REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
 		/* IEEE 802.11b/g, channels 12..13. */
-		REG_RULE(2467-10, 2472+10, 40, 6, 20,
-			NL80211_RRF_NO_IR),
+		REG_RULE(2467-10, 2472+10, 20, 6, 20,
+			NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
 		/* IEEE 802.11 channel 14 - Only JP enables
 		 * this and for 802.11b only */
 		REG_RULE(2484-10, 2484+10, 20, 6, 20,
 			NL80211_RRF_NO_IR |
 			NL80211_RRF_NO_OFDM),
 		/* IEEE 802.11a, channel 36..48 */
-		REG_RULE(5180-10, 5240+10, 160, 6, 20,
-                        NL80211_RRF_NO_IR),
+		REG_RULE(5180-10, 5240+10, 80, 6, 20,
+                        NL80211_RRF_NO_IR |
+                        NL80211_RRF_AUTO_BW),
 
 		/* IEEE 802.11a, channel 52..64 - DFS required */
-		REG_RULE(5260-10, 5320+10, 160, 6, 20,
+		REG_RULE(5260-10, 5320+10, 80, 6, 20,
 			NL80211_RRF_NO_IR |
+			NL80211_RRF_AUTO_BW |
 			NL80211_RRF_DFS),
 
 		/* IEEE 802.11a, channel 100..144 - DFS required */
@@ -2745,7 +2747,7 @@
 	const struct ieee80211_power_rule *power_rule = NULL;
 	char bw[32], cac_time[32];
 
-	pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
+	pr_debug("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
 
 	for (i = 0; i < rd->n_reg_rules; i++) {
 		reg_rule = &rd->reg_rules[i];
@@ -2772,7 +2774,7 @@
 		 * in certain regions
 		 */
 		if (power_rule->max_antenna_gain)
-			pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
+			pr_debug("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
 				bw,
@@ -2780,7 +2782,7 @@
 				power_rule->max_eirp,
 				cac_time);
 		else
-			pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
+			pr_debug("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
 				bw,
@@ -2813,35 +2815,35 @@
 			struct cfg80211_registered_device *rdev;
 			rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
 			if (rdev) {
-				pr_info("Current regulatory domain updated by AP to: %c%c\n",
+				pr_debug("Current regulatory domain updated by AP to: %c%c\n",
 					rdev->country_ie_alpha2[0],
 					rdev->country_ie_alpha2[1]);
 			} else
-				pr_info("Current regulatory domain intersected:\n");
+				pr_debug("Current regulatory domain intersected:\n");
 		} else
-			pr_info("Current regulatory domain intersected:\n");
+			pr_debug("Current regulatory domain intersected:\n");
 	} else if (is_world_regdom(rd->alpha2)) {
-		pr_info("World regulatory domain updated:\n");
+		pr_debug("World regulatory domain updated:\n");
 	} else {
 		if (is_unknown_alpha2(rd->alpha2))
-			pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
+			pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
 		else {
 			if (reg_request_cell_base(lr))
-				pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
+				pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
 					rd->alpha2[0], rd->alpha2[1]);
 			else
-				pr_info("Regulatory domain changed to country: %c%c\n",
+				pr_debug("Regulatory domain changed to country: %c%c\n",
 					rd->alpha2[0], rd->alpha2[1]);
 		}
 	}
 
-	pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
+	pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
 	print_rd_rules(rd);
 }
 
 static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 {
-	pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
+	pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
 	print_rd_rules(rd);
 }
 
@@ -2862,7 +2864,8 @@
 		return -EALREADY;
 
 	if (!is_valid_rd(rd)) {
-		pr_err("Invalid regulatory domain detected:\n");
+		pr_err("Invalid regulatory domain detected: %c%c\n",
+		       rd->alpha2[0], rd->alpha2[1]);
 		print_regdomain_info(rd);
 		return -EINVAL;
 	}
@@ -2898,7 +2901,8 @@
 		return -EALREADY;
 
 	if (!is_valid_rd(rd)) {
-		pr_err("Invalid regulatory domain detected:\n");
+		pr_err("Invalid regulatory domain detected: %c%c\n",
+		       rd->alpha2[0], rd->alpha2[1]);
 		print_regdomain_info(rd);
 		return -EINVAL;
 	}
@@ -2956,7 +2960,8 @@
 	 */
 
 	if (!is_valid_rd(rd)) {
-		pr_err("Invalid regulatory domain detected:\n");
+		pr_err("Invalid regulatory domain detected: %c%c\n",
+		       rd->alpha2[0], rd->alpha2[1]);
 		print_regdomain_info(rd);
 		return -EINVAL;
 	}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 8020b5b..d49ed76 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -917,6 +917,12 @@
 
 	nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
 
+	/* stop critical protocol if supported */
+	if (rdev->ops->crit_proto_stop && rdev->crit_proto_nlportid) {
+		rdev->crit_proto_nlportid = 0;
+		rdev_crit_proto_stop(rdev, wdev);
+	}
+
 	/*
 	 * Delete all the keys ... pairwise keys can't really
 	 * exist any more anyway, but default keys might.
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1..b50ee5d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@
 
 /* IW event code */
 
+void wireless_nlevent_flush(void)
+{
+	struct sk_buff *skb;
+	struct net *net;
+
+	ASSERT_RTNL();
+
+	for_each_net(net) {
+		while ((skb = skb_dequeue(&net->wext_nlevents)))
+			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+				    GFP_KERNEL);
+	}
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+				     unsigned long state, void *ptr)
+{
+	/*
+	 * When a netdev changes state in any way, flush all pending messages
+	 * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+	 * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+	 * or similar - all of which could otherwise happen due to delays from
+	 * schedule_work().
+	 */
+	wireless_nlevent_flush();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+	.notifier_call = wext_netdev_notifier_call,
+};
+
 static int __net_init wext_pernet_init(struct net *net)
 {
 	skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@
 
 static int __init wireless_nlevent_init(void)
 {
-	return register_pernet_subsys(&wext_pernet_ops);
+	int err = register_pernet_subsys(&wext_pernet_ops);
+
+	if (err)
+		return err;
+
+	return register_netdevice_notifier(&wext_netdev_notifier);
 }
 
 subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@
 /* Process events generated by the wireless layer or the driver. */
 static void wireless_nlevent_process(struct work_struct *work)
 {
-	struct sk_buff *skb;
-	struct net *net;
-
 	rtnl_lock();
-
-	for_each_net(net) {
-		while ((skb = skb_dequeue(&net->wext_nlevents)))
-			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
-				    GFP_KERNEL);
-	}
-
+	wireless_nlevent_flush();
 	rtnl_unlock();
 }
 
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 0147c91..874132b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -269,7 +269,8 @@
 			__init_refok|
 			__kprobes|
 			__ref|
-			__rcu
+			__rcu|
+			__private
 		}x;
 our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)};
 our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)};
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
index d154f08..7bfe9fa 100755
--- a/scripts/ld-version.sh
+++ b/scripts/ld-version.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/awk -f
 # extract linker version number from stdin and turn into single number
 	{
-	gsub(".*)", "");
+	gsub(".*\\)", "");
 	gsub(".*version ", "");
 	gsub("-.*", "");
 	split($1,a, ".");
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index e080746..48958d3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -594,7 +594,8 @@
 		if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
 		    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
 		    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+		    strcmp(symname, ".TOC.") == 0)
 			return 1;
 	/* Do not ignore this symbol */
 	return 0;
diff --git a/scripts/prune-kernel b/scripts/prune-kernel
new file mode 100755
index 0000000..ab5034e
--- /dev/null
+++ b/scripts/prune-kernel
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# because I use CONFIG_LOCALVERSION_AUTO, not the same version again and
+# again, /boot and /lib/modules/ eventually fill up.
+# Dumb script to purge that stuff:
+
+for f in "$@"
+do
+        if rpm -qf "/lib/modules/$f" >/dev/null; then
+                echo "keeping $f (installed from rpm)"
+        elif [ $(uname -r) = "$f" ]; then
+                echo "keeping $f (running kernel) "
+        else
+                echo "removing $f"
+                rm -f "/boot/initramfs-$f.img" "/boot/System.map-$f"
+                rm -f "/boot/vmlinuz-$f"   "/boot/config-$f"
+                rm -rf "/lib/modules/$f"
+                new-kernel-pkg --remove $f
+        fi
+done
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index c2423d9..7b29fb1 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -209,6 +209,35 @@
 	return 0;
 }
 
+static void x86_sort_relative_table(char *extab_image, int image_size)
+{
+	int i;
+
+	i = 0;
+	while (i < image_size) {
+		uint32_t *loc = (uint32_t *)(extab_image + i);
+
+		w(r(loc) + i, loc);
+		w(r(loc + 1) + i + 4, loc + 1);
+		w(r(loc + 2) + i + 8, loc + 2);
+
+		i += sizeof(uint32_t) * 3;
+	}
+
+	qsort(extab_image, image_size / 12, 12, compare_relative_table);
+
+	i = 0;
+	while (i < image_size) {
+		uint32_t *loc = (uint32_t *)(extab_image + i);
+
+		w(r(loc) - i, loc);
+		w(r(loc + 1) - (i + 4), loc + 1);
+		w(r(loc + 2) - (i + 8), loc + 2);
+
+		i += sizeof(uint32_t) * 3;
+	}
+}
+
 static void sort_relative_table(char *extab_image, int image_size)
 {
 	int i;
@@ -281,6 +310,9 @@
 		break;
 	case EM_386:
 	case EM_X86_64:
+		custom_sort = x86_sort_relative_table;
+		break;
+
 	case EM_S390:
 		custom_sort = sort_relative_table;
 		break;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index f716025..e6ea9d4 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -23,6 +23,7 @@
 #include <linux/integrity.h>
 #include <linux/evm.h>
 #include <crypto/hash.h>
+#include <crypto/algapi.h>
 #include "evm.h"
 
 int evm_initialized;
@@ -148,7 +149,7 @@
 				   xattr_value_len, calc.digest);
 		if (rc)
 			break;
-		rc = memcmp(xattr_data->digest, calc.digest,
+		rc = crypto_memneq(xattr_data->digest, calc.digest,
 			    sizeof(calc.digest));
 		if (rc)
 			rc = -EINVAL;
diff --git a/security/keys/key.c b/security/keys/key.c
index 07a8731..09ef276 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -430,7 +430,8 @@
 
 			/* and link it into the destination keyring */
 			if (keyring) {
-				set_bit(KEY_FLAG_KEEP, &key->flags);
+				if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+					set_bit(KEY_FLAG_KEEP, &key->flags);
 
 				__key_link(key, _edit);
 			}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f8110cf..f1ab715 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3249,7 +3249,7 @@
 
 static void selinux_inode_getsecid(struct inode *inode, u32 *secid)
 {
-	struct inode_security_struct *isec = inode_security(inode);
+	struct inode_security_struct *isec = inode_security_novalidate(inode);
 	*secid = isec->sid;
 }
 
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 2bbb418..8495b93 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -83,6 +83,7 @@
 	{ TCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 	{ DCCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 	{ SOCK_DIAG_BY_FAMILY,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+	{ SOCK_DESTROY,		NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE },
 };
 
 static struct nlmsg_perm nlmsg_xfrm_perms[] =
diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c
index e9b98af..e8da3b8 100644
--- a/sound/arm/pxa2xx-pcm-lib.c
+++ b/sound/arm/pxa2xx-pcm-lib.c
@@ -141,10 +141,8 @@
 	struct vm_area_struct *vma)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
-	return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-				     runtime->dma_area,
-				     runtime->dma_addr,
-				     runtime->dma_bytes);
+	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			   runtime->dma_addr, runtime->dma_bytes);
 }
 EXPORT_SYMBOL(pxa2xx_pcm_mmap);
 
@@ -156,8 +154,7 @@
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
 	buf->dev.dev = pcm->card->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-					   &buf->addr, GFP_KERNEL);
+	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
 	if (!buf->area)
 		return -ENOMEM;
 	buf->bytes = size;
@@ -178,8 +175,7 @@
 		buf = &substream->dma_buffer;
 		if (!buf->area)
 			continue;
-		dma_free_writecombine(pcm->card->dev, buf->bytes,
-				      buf->area, buf->addr);
+		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
 		buf->area = NULL;
 	}
 }
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index e3e9491..a2a1e24 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -97,11 +97,11 @@
 	bool "PCM timer interface" if EXPERT
 	default y
 	help
-	  If you disable this option, pcm timer will be inavailable, so
-	  those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+	  If you disable this option, pcm timer will be unavailable, so
+	  those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
 	  incorrectlly.
 
-	  For some embedded device, we may disable it to reduce memory
+	  For some embedded devices, we may disable it to reduce memory
 	  footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 18b8dc4..7fac3ca 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -46,6 +46,13 @@
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *	SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@
 	return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@
 	kfree(caps);
 	return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@
 	case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
 		retval = snd_compr_get_caps(stream, arg);
 		break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 	case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
 		retval = snd_compr_get_codec_caps(stream, arg);
 		break;
+#endif
 	case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
 		retval = snd_compr_set_params(stream, arg);
 		break;
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index b9c0910..0608f21 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -170,6 +170,19 @@
         unsigned char reserved[128];
 };
 
+#ifdef CONFIG_X86_X32
+/* x32 has a different alignment for 64bit values from ia32 */
+struct snd_ctl_elem_value_x32 {
+	struct snd_ctl_elem_id id;
+	unsigned int indirect;	/* bit-field causes misalignment */
+	union {
+		s32 integer[128];
+		unsigned char data[512];
+		s64 integer64[64];
+	} value;
+	unsigned char reserved[128];
+};
+#endif /* CONFIG_X86_X32 */
 
 /* get the value type and count of the control */
 static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id,
@@ -219,9 +232,11 @@
 
 static int copy_ctl_value_from_user(struct snd_card *card,
 				    struct snd_ctl_elem_value *data,
-				    struct snd_ctl_elem_value32 __user *data32,
+				    void __user *userdata,
+				    void __user *valuep,
 				    int *typep, int *countp)
 {
+	struct snd_ctl_elem_value32 __user *data32 = userdata;
 	int i, type, size;
 	int uninitialized_var(count);
 	unsigned int indirect;
@@ -239,8 +254,9 @@
 	if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
 	    type == SNDRV_CTL_ELEM_TYPE_INTEGER) {
 		for (i = 0; i < count; i++) {
+			s32 __user *intp = valuep;
 			int val;
-			if (get_user(val, &data32->value.integer[i]))
+			if (get_user(val, &intp[i]))
 				return -EFAULT;
 			data->value.integer.value[i] = val;
 		}
@@ -250,8 +266,7 @@
 			dev_err(card->dev, "snd_ioctl32_ctl_elem_value: unknown type %d\n", type);
 			return -EINVAL;
 		}
-		if (copy_from_user(data->value.bytes.data,
-				   data32->value.data, size))
+		if (copy_from_user(data->value.bytes.data, valuep, size))
 			return -EFAULT;
 	}
 
@@ -261,7 +276,8 @@
 }
 
 /* restore the value to 32bit */
-static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32,
+static int copy_ctl_value_to_user(void __user *userdata,
+				  void __user *valuep,
 				  struct snd_ctl_elem_value *data,
 				  int type, int count)
 {
@@ -270,22 +286,22 @@
 	if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
 	    type == SNDRV_CTL_ELEM_TYPE_INTEGER) {
 		for (i = 0; i < count; i++) {
+			s32 __user *intp = valuep;
 			int val;
 			val = data->value.integer.value[i];
-			if (put_user(val, &data32->value.integer[i]))
+			if (put_user(val, &intp[i]))
 				return -EFAULT;
 		}
 	} else {
 		size = get_elem_size(type, count);
-		if (copy_to_user(data32->value.data,
-				 data->value.bytes.data, size))
+		if (copy_to_user(valuep, data->value.bytes.data, size))
 			return -EFAULT;
 	}
 	return 0;
 }
 
-static int snd_ctl_elem_read_user_compat(struct snd_card *card, 
-					 struct snd_ctl_elem_value32 __user *data32)
+static int ctl_elem_read_user(struct snd_card *card,
+			      void __user *userdata, void __user *valuep)
 {
 	struct snd_ctl_elem_value *data;
 	int err, type, count;
@@ -294,7 +310,9 @@
 	if (data == NULL)
 		return -ENOMEM;
 
-	if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0)
+	err = copy_ctl_value_from_user(card, data, userdata, valuep,
+				       &type, &count);
+	if (err < 0)
 		goto error;
 
 	snd_power_lock(card);
@@ -303,14 +321,15 @@
 		err = snd_ctl_elem_read(card, data);
 	snd_power_unlock(card);
 	if (err >= 0)
-		err = copy_ctl_value_to_user(data32, data, type, count);
+		err = copy_ctl_value_to_user(userdata, valuep, data,
+					     type, count);
  error:
 	kfree(data);
 	return err;
 }
 
-static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
-					  struct snd_ctl_elem_value32 __user *data32)
+static int ctl_elem_write_user(struct snd_ctl_file *file,
+			       void __user *userdata, void __user *valuep)
 {
 	struct snd_ctl_elem_value *data;
 	struct snd_card *card = file->card;
@@ -320,7 +339,9 @@
 	if (data == NULL)
 		return -ENOMEM;
 
-	if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0)
+	err = copy_ctl_value_from_user(card, data, userdata, valuep,
+				       &type, &count);
+	if (err < 0)
 		goto error;
 
 	snd_power_lock(card);
@@ -329,12 +350,39 @@
 		err = snd_ctl_elem_write(card, file, data);
 	snd_power_unlock(card);
 	if (err >= 0)
-		err = copy_ctl_value_to_user(data32, data, type, count);
+		err = copy_ctl_value_to_user(userdata, valuep, data,
+					     type, count);
  error:
 	kfree(data);
 	return err;
 }
 
+static int snd_ctl_elem_read_user_compat(struct snd_card *card,
+					 struct snd_ctl_elem_value32 __user *data32)
+{
+	return ctl_elem_read_user(card, data32, &data32->value);
+}
+
+static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
+					  struct snd_ctl_elem_value32 __user *data32)
+{
+	return ctl_elem_write_user(file, data32, &data32->value);
+}
+
+#ifdef CONFIG_X86_X32
+static int snd_ctl_elem_read_user_x32(struct snd_card *card,
+				      struct snd_ctl_elem_value_x32 __user *data32)
+{
+	return ctl_elem_read_user(card, data32, &data32->value);
+}
+
+static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file,
+				       struct snd_ctl_elem_value_x32 __user *data32)
+{
+	return ctl_elem_write_user(file, data32, &data32->value);
+}
+#endif /* CONFIG_X86_X32 */
+
 /* add or replace a user control */
 static int snd_ctl_elem_add_compat(struct snd_ctl_file *file,
 				   struct snd_ctl_elem_info32 __user *data32,
@@ -393,6 +441,10 @@
 	SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32),
 	SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32),
 	SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32),
+#ifdef CONFIG_X86_X32
+	SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32),
+	SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32),
+#endif /* CONFIG_X86_X32 */
 };
 
 static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -431,6 +483,12 @@
 		return snd_ctl_elem_add_compat(ctl, argp, 0);
 	case SNDRV_CTL_IOCTL_ELEM_REPLACE32:
 		return snd_ctl_elem_add_compat(ctl, argp, 1);
+#ifdef CONFIG_X86_X32
+	case SNDRV_CTL_IOCTL_ELEM_READ_X32:
+		return snd_ctl_elem_read_user_x32(ctl->card, argp);
+	case SNDRV_CTL_IOCTL_ELEM_WRITE_X32:
+		return snd_ctl_elem_write_user_x32(ctl, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 
 	down_read(&snd_ioctl_rwsem);
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 0e73d03..ebc9fdf 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -835,7 +835,8 @@
 	return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL);
 }
 
-static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
+static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream,
+				     bool trylock)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_pcm_hw_params *params, *sparams;
@@ -849,7 +850,10 @@
 	struct snd_mask sformat_mask;
 	struct snd_mask mask;
 
-	if (mutex_lock_interruptible(&runtime->oss.params_lock))
+	if (trylock) {
+		if (!(mutex_trylock(&runtime->oss.params_lock)))
+			return -EAGAIN;
+	} else if (mutex_lock_interruptible(&runtime->oss.params_lock))
 		return -EINTR;
 	sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL);
 	params = kmalloc(sizeof(*params), GFP_KERNEL);
@@ -1092,7 +1096,7 @@
 		if (asubstream == NULL)
 			asubstream = substream;
 		if (substream->runtime->oss.params) {
-			err = snd_pcm_oss_change_params(substream);
+			err = snd_pcm_oss_change_params(substream, false);
 			if (err < 0)
 				return err;
 		}
@@ -1132,7 +1136,7 @@
 		return 0;
 	runtime = substream->runtime;
 	if (runtime->oss.params) {
-		err = snd_pcm_oss_change_params(substream);
+		err = snd_pcm_oss_change_params(substream, false);
 		if (err < 0)
 			return err;
 	}
@@ -2163,7 +2167,7 @@
 	runtime = substream->runtime;
 
 	if (runtime->oss.params &&
-	    (err = snd_pcm_oss_change_params(substream)) < 0)
+	    (err = snd_pcm_oss_change_params(substream, false)) < 0)
 		return err;
 
 	info.fragsize = runtime->oss.period_bytes;
@@ -2804,7 +2808,12 @@
 		return -EIO;
 	
 	if (runtime->oss.params) {
-		if ((err = snd_pcm_oss_change_params(substream)) < 0)
+		/* use mutex_trylock() for params_lock for avoiding a deadlock
+		 * between mmap_sem and params_lock taken by
+		 * copy_from/to_user() in snd_pcm_oss_write/read()
+		 */
+		err = snd_pcm_oss_change_params(substream, true);
+		if (err < 0)
 			return err;
 	}
 #ifdef CONFIG_SND_PCM_OSS_PLUGINS
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 9630e9f..1f64ab0 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -183,6 +183,14 @@
 	return err;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */
+static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream,
+				     struct snd_pcm_channel_info __user *src);
+#define snd_pcm_ioctl_channel_info_x32(s, p)	\
+	snd_pcm_channel_info_user(s, p)
+#endif /* CONFIG_X86_X32 */
+
 struct snd_pcm_status32 {
 	s32 state;
 	struct compat_timespec trigger_tstamp;
@@ -243,6 +251,71 @@
 	return err;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_pcm_status_x32 {
+	s32 state;
+	u32 rsvd; /* alignment */
+	struct timespec trigger_tstamp;
+	struct timespec tstamp;
+	u32 appl_ptr;
+	u32 hw_ptr;
+	s32 delay;
+	u32 avail;
+	u32 avail_max;
+	u32 overrange;
+	s32 suspended_state;
+	u32 audio_tstamp_data;
+	struct timespec audio_tstamp;
+	struct timespec driver_tstamp;
+	u32 audio_tstamp_accuracy;
+	unsigned char reserved[52-2*sizeof(struct timespec)];
+} __packed;
+
+#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst))
+
+static int snd_pcm_status_user_x32(struct snd_pcm_substream *substream,
+				   struct snd_pcm_status_x32 __user *src,
+				   bool ext)
+{
+	struct snd_pcm_status status;
+	int err;
+
+	memset(&status, 0, sizeof(status));
+	/*
+	 * with extension, parameters are read/write,
+	 * get audio_tstamp_data from user,
+	 * ignore rest of status structure
+	 */
+	if (ext && get_user(status.audio_tstamp_data,
+				(u32 __user *)(&src->audio_tstamp_data)))
+		return -EFAULT;
+	err = snd_pcm_status(substream, &status);
+	if (err < 0)
+		return err;
+
+	if (clear_user(src, sizeof(*src)))
+		return -EFAULT;
+	if (put_user(status.state, &src->state) ||
+	    put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) ||
+	    put_timespec(&status.tstamp, &src->tstamp) ||
+	    put_user(status.appl_ptr, &src->appl_ptr) ||
+	    put_user(status.hw_ptr, &src->hw_ptr) ||
+	    put_user(status.delay, &src->delay) ||
+	    put_user(status.avail, &src->avail) ||
+	    put_user(status.avail_max, &src->avail_max) ||
+	    put_user(status.overrange, &src->overrange) ||
+	    put_user(status.suspended_state, &src->suspended_state) ||
+	    put_user(status.audio_tstamp_data, &src->audio_tstamp_data) ||
+	    put_timespec(&status.audio_tstamp, &src->audio_tstamp) ||
+	    put_timespec(&status.driver_tstamp, &src->driver_tstamp) ||
+	    put_user(status.audio_tstamp_accuracy, &src->audio_tstamp_accuracy))
+		return -EFAULT;
+
+	return err;
+}
+#endif /* CONFIG_X86_X32 */
+
 /* both for HW_PARAMS and HW_REFINE */
 static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
 					  int refine, 
@@ -469,6 +542,93 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_pcm_mmap_status_x32 {
+	s32 state;
+	s32 pad1;
+	u32 hw_ptr;
+	u32 pad2; /* alignment */
+	struct timespec tstamp;
+	s32 suspended_state;
+	struct timespec audio_tstamp;
+} __packed;
+
+struct snd_pcm_mmap_control_x32 {
+	u32 appl_ptr;
+	u32 avail_min;
+};
+
+struct snd_pcm_sync_ptr_x32 {
+	u32 flags;
+	u32 rsvd; /* alignment */
+	union {
+		struct snd_pcm_mmap_status_x32 status;
+		unsigned char reserved[64];
+	} s;
+	union {
+		struct snd_pcm_mmap_control_x32 control;
+		unsigned char reserved[64];
+	} c;
+} __packed;
+
+static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
+				      struct snd_pcm_sync_ptr_x32 __user *src)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	volatile struct snd_pcm_mmap_status *status;
+	volatile struct snd_pcm_mmap_control *control;
+	u32 sflags;
+	struct snd_pcm_mmap_control scontrol;
+	struct snd_pcm_mmap_status sstatus;
+	snd_pcm_uframes_t boundary;
+	int err;
+
+	if (snd_BUG_ON(!runtime))
+		return -EINVAL;
+
+	if (get_user(sflags, &src->flags) ||
+	    get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
+	    get_user(scontrol.avail_min, &src->c.control.avail_min))
+		return -EFAULT;
+	if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
+		err = snd_pcm_hwsync(substream);
+		if (err < 0)
+			return err;
+	}
+	status = runtime->status;
+	control = runtime->control;
+	boundary = recalculate_boundary(runtime);
+	if (!boundary)
+		boundary = 0x7fffffff;
+	snd_pcm_stream_lock_irq(substream);
+	/* FIXME: we should consider the boundary for the sync from app */
+	if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
+		control->appl_ptr = scontrol.appl_ptr;
+	else
+		scontrol.appl_ptr = control->appl_ptr % boundary;
+	if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
+		control->avail_min = scontrol.avail_min;
+	else
+		scontrol.avail_min = control->avail_min;
+	sstatus.state = status->state;
+	sstatus.hw_ptr = status->hw_ptr % boundary;
+	sstatus.tstamp = status->tstamp;
+	sstatus.suspended_state = status->suspended_state;
+	sstatus.audio_tstamp = status->audio_tstamp;
+	snd_pcm_stream_unlock_irq(substream);
+	if (put_user(sstatus.state, &src->s.status.state) ||
+	    put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
+	    put_timespec(&sstatus.tstamp, &src->s.status.tstamp) ||
+	    put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
+	    put_timespec(&sstatus.audio_tstamp, &src->s.status.audio_tstamp) ||
+	    put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
+	    put_user(scontrol.avail_min, &src->c.control.avail_min))
+		return -EFAULT;
+
+	return 0;
+}
+#endif /* CONFIG_X86_X32 */
 
 /*
  */
@@ -487,7 +647,12 @@
 	SNDRV_PCM_IOCTL_WRITEN_FRAMES32 = _IOW('A', 0x52, struct snd_xfern32),
 	SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32),
 	SNDRV_PCM_IOCTL_SYNC_PTR32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr32),
-
+#ifdef CONFIG_X86_X32
+	SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info),
+	SNDRV_PCM_IOCTL_STATUS_X32 = _IOR('A', 0x20, struct snd_pcm_status_x32),
+	SNDRV_PCM_IOCTL_STATUS_EXT_X32 = _IOWR('A', 0x24, struct snd_pcm_status_x32),
+	SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32),
+#endif /* CONFIG_X86_X32 */
 };
 
 static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -559,6 +724,16 @@
 		return snd_pcm_ioctl_rewind_compat(substream, argp);
 	case SNDRV_PCM_IOCTL_FORWARD32:
 		return snd_pcm_ioctl_forward_compat(substream, argp);
+#ifdef CONFIG_X86_X32
+	case SNDRV_PCM_IOCTL_STATUS_X32:
+		return snd_pcm_status_user_x32(substream, argp, false);
+	case SNDRV_PCM_IOCTL_STATUS_EXT_X32:
+		return snd_pcm_status_user_x32(substream, argp, true);
+	case SNDRV_PCM_IOCTL_SYNC_PTR_X32:
+		return snd_pcm_ioctl_sync_ptr_x32(substream, argp);
+	case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32:
+		return snd_pcm_ioctl_channel_info_x32(substream, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 
 	return -ENOIOCTLCMD;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index fadd3eb..9106d8e 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -74,6 +74,18 @@
 static DEFINE_RWLOCK(snd_pcm_link_rwlock);
 static DECLARE_RWSEM(snd_pcm_link_rwsem);
 
+/* Writer in rwsem may block readers even during its waiting in queue,
+ * and this may lead to a deadlock when the code path takes read sem
+ * twice (e.g. one in snd_pcm_action_nonatomic() and another in
+ * snd_pcm_stream_lock()).  As a (suboptimal) workaround, let writer to
+ * spin until it gets the lock.
+ */
+static inline void down_write_nonblock(struct rw_semaphore *lock)
+{
+	while (!down_write_trylock(lock))
+		cond_resched();
+}
+
 /**
  * snd_pcm_stream_lock - Lock the PCM stream
  * @substream: PCM substream
@@ -1813,7 +1825,7 @@
 		res = -ENOMEM;
 		goto _nolock;
 	}
-	down_write(&snd_pcm_link_rwsem);
+	down_write_nonblock(&snd_pcm_link_rwsem);
 	write_lock_irq(&snd_pcm_link_rwlock);
 	if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN ||
 	    substream->runtime->status->state != substream1->runtime->status->state ||
@@ -1860,7 +1872,7 @@
 	struct snd_pcm_substream *s;
 	int res = 0;
 
-	down_write(&snd_pcm_link_rwsem);
+	down_write_nonblock(&snd_pcm_link_rwsem);
 	write_lock_irq(&snd_pcm_link_rwlock);
 	if (!snd_pcm_stream_linked(substream)) {
 		res = -EALREADY;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index a775984..795437b 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -942,31 +942,36 @@
 	unsigned long flags;
 	long result = 0, count1;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
+	unsigned long appl_ptr;
 
+	spin_lock_irqsave(&runtime->lock, flags);
 	while (count > 0 && runtime->avail) {
 		count1 = runtime->buffer_size - runtime->appl_ptr;
 		if (count1 > count)
 			count1 = count;
-		spin_lock_irqsave(&runtime->lock, flags);
 		if (count1 > (int)runtime->avail)
 			count1 = runtime->avail;
+
+		/* update runtime->appl_ptr before unlocking for userbuf */
+		appl_ptr = runtime->appl_ptr;
+		runtime->appl_ptr += count1;
+		runtime->appl_ptr %= runtime->buffer_size;
+		runtime->avail -= count1;
+
 		if (kernelbuf)
-			memcpy(kernelbuf + result, runtime->buffer + runtime->appl_ptr, count1);
+			memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1);
 		if (userbuf) {
 			spin_unlock_irqrestore(&runtime->lock, flags);
 			if (copy_to_user(userbuf + result,
-					 runtime->buffer + runtime->appl_ptr, count1)) {
+					 runtime->buffer + appl_ptr, count1)) {
 				return result > 0 ? result : -EFAULT;
 			}
 			spin_lock_irqsave(&runtime->lock, flags);
 		}
-		runtime->appl_ptr += count1;
-		runtime->appl_ptr %= runtime->buffer_size;
-		runtime->avail -= count1;
-		spin_unlock_irqrestore(&runtime->lock, flags);
 		result += count1;
 		count -= count1;
 	}
+	spin_unlock_irqrestore(&runtime->lock, flags);
 	return result;
 }
 
@@ -1055,23 +1060,16 @@
 EXPORT_SYMBOL(snd_rawmidi_transmit_empty);
 
 /**
- * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * __snd_rawmidi_transmit_peek - copy data from the internal buffer
  * @substream: the rawmidi substream
  * @buffer: the buffer pointer
  * @count: data size to transfer
  *
- * Copies data from the internal output buffer to the given buffer.
- *
- * Call this in the interrupt handler when the midi output is ready,
- * and call snd_rawmidi_transmit_ack() after the transmission is
- * finished.
- *
- * Return: The size of copied data, or a negative error code on failure.
+ * This is a variant of snd_rawmidi_transmit_peek() without spinlock.
  */
-int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
 			      unsigned char *buffer, int count)
 {
-	unsigned long flags;
 	int result, count1;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 
@@ -1081,7 +1079,6 @@
 		return -EINVAL;
 	}
 	result = 0;
-	spin_lock_irqsave(&runtime->lock, flags);
 	if (runtime->avail >= runtime->buffer_size) {
 		/* warning: lowlevel layer MUST trigger down the hardware */
 		goto __skip;
@@ -1106,12 +1103,68 @@
 		}
 	}
       __skip:
+	return result;
+}
+EXPORT_SYMBOL(__snd_rawmidi_transmit_peek);
+
+/**
+ * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * @substream: the rawmidi substream
+ * @buffer: the buffer pointer
+ * @count: data size to transfer
+ *
+ * Copies data from the internal output buffer to the given buffer.
+ *
+ * Call this in the interrupt handler when the midi output is ready,
+ * and call snd_rawmidi_transmit_ack() after the transmission is
+ * finished.
+ *
+ * Return: The size of copied data, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+			      unsigned char *buffer, int count)
+{
+	struct snd_rawmidi_runtime *runtime = substream->runtime;
+	int result;
+	unsigned long flags;
+
+	spin_lock_irqsave(&runtime->lock, flags);
+	result = __snd_rawmidi_transmit_peek(substream, buffer, count);
 	spin_unlock_irqrestore(&runtime->lock, flags);
 	return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit_peek);
 
 /**
+ * __snd_rawmidi_transmit_ack - acknowledge the transmission
+ * @substream: the rawmidi substream
+ * @count: the transferred count
+ *
+ * This is a variant of __snd_rawmidi_transmit_ack() without spinlock.
+ */
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+{
+	struct snd_rawmidi_runtime *runtime = substream->runtime;
+
+	if (runtime->buffer == NULL) {
+		rmidi_dbg(substream->rmidi,
+			  "snd_rawmidi_transmit_ack: output is not active!!!\n");
+		return -EINVAL;
+	}
+	snd_BUG_ON(runtime->avail + count > runtime->buffer_size);
+	runtime->hw_ptr += count;
+	runtime->hw_ptr %= runtime->buffer_size;
+	runtime->avail += count;
+	substream->bytes += count;
+	if (count > 0) {
+		if (runtime->drain || snd_rawmidi_ready(substream))
+			wake_up(&runtime->sleep);
+	}
+	return count;
+}
+EXPORT_SYMBOL(__snd_rawmidi_transmit_ack);
+
+/**
  * snd_rawmidi_transmit_ack - acknowledge the transmission
  * @substream: the rawmidi substream
  * @count: the transferred count
@@ -1124,26 +1177,14 @@
  */
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
 {
-	unsigned long flags;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
+	int result;
+	unsigned long flags;
 
-	if (runtime->buffer == NULL) {
-		rmidi_dbg(substream->rmidi,
-			  "snd_rawmidi_transmit_ack: output is not active!!!\n");
-		return -EINVAL;
-	}
 	spin_lock_irqsave(&runtime->lock, flags);
-	snd_BUG_ON(runtime->avail + count > runtime->buffer_size);
-	runtime->hw_ptr += count;
-	runtime->hw_ptr %= runtime->buffer_size;
-	runtime->avail += count;
-	substream->bytes += count;
-	if (count > 0) {
-		if (runtime->drain || snd_rawmidi_ready(substream))
-			wake_up(&runtime->sleep);
-	}
+	result = __snd_rawmidi_transmit_ack(substream, count);
 	spin_unlock_irqrestore(&runtime->lock, flags);
-	return count;
+	return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 
@@ -1160,12 +1201,22 @@
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
 			 unsigned char *buffer, int count)
 {
+	struct snd_rawmidi_runtime *runtime = substream->runtime;
+	int result;
+	unsigned long flags;
+
+	spin_lock_irqsave(&runtime->lock, flags);
 	if (!substream->opened)
-		return -EBADFD;
-	count = snd_rawmidi_transmit_peek(substream, buffer, count);
-	if (count < 0)
-		return count;
-	return snd_rawmidi_transmit_ack(substream, count);
+		result = -EBADFD;
+	else {
+		count = __snd_rawmidi_transmit_peek(substream, buffer, count);
+		if (count <= 0)
+			result = count;
+		else
+			result = __snd_rawmidi_transmit_ack(substream, count);
+	}
+	spin_unlock_irqrestore(&runtime->lock, flags);
+	return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit);
 
@@ -1177,8 +1228,9 @@
 	unsigned long flags;
 	long count1, result;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
+	unsigned long appl_ptr;
 
-	if (snd_BUG_ON(!kernelbuf && !userbuf))
+	if (!kernelbuf && !userbuf)
 		return -EINVAL;
 	if (snd_BUG_ON(!runtime->buffer))
 		return -EINVAL;
@@ -1197,12 +1249,19 @@
 			count1 = count;
 		if (count1 > (long)runtime->avail)
 			count1 = runtime->avail;
+
+		/* update runtime->appl_ptr before unlocking for userbuf */
+		appl_ptr = runtime->appl_ptr;
+		runtime->appl_ptr += count1;
+		runtime->appl_ptr %= runtime->buffer_size;
+		runtime->avail -= count1;
+
 		if (kernelbuf)
-			memcpy(runtime->buffer + runtime->appl_ptr,
+			memcpy(runtime->buffer + appl_ptr,
 			       kernelbuf + result, count1);
 		else if (userbuf) {
 			spin_unlock_irqrestore(&runtime->lock, flags);
-			if (copy_from_user(runtime->buffer + runtime->appl_ptr,
+			if (copy_from_user(runtime->buffer + appl_ptr,
 					   userbuf + result, count1)) {
 				spin_lock_irqsave(&runtime->lock, flags);
 				result = result > 0 ? result : -EFAULT;
@@ -1210,9 +1269,6 @@
 			}
 			spin_lock_irqsave(&runtime->lock, flags);
 		}
-		runtime->appl_ptr += count1;
-		runtime->appl_ptr %= runtime->buffer_size;
-		runtime->avail -= count1;
 		result += count1;
 		count -= count1;
 	}
diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c
index 5268c1f..f69764d 100644
--- a/sound/core/rawmidi_compat.c
+++ b/sound/core/rawmidi_compat.c
@@ -85,8 +85,7 @@
 	if (err < 0)
 		return err;
 
-	if (put_user(status.tstamp.tv_sec, &src->tstamp.tv_sec) ||
-	    put_user(status.tstamp.tv_nsec, &src->tstamp.tv_nsec) ||
+	if (compat_put_timespec(&status.tstamp, &src->tstamp) ||
 	    put_user(status.avail, &src->avail) ||
 	    put_user(status.xruns, &src->xruns))
 		return -EFAULT;
@@ -94,9 +93,58 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_rawmidi_status_x32 {
+	s32 stream;
+	u32 rsvd; /* alignment */
+	struct timespec tstamp;
+	u32 avail;
+	u32 xruns;
+	unsigned char reserved[16];
+} __attribute__((packed));
+
+#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst))
+
+static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile,
+					struct snd_rawmidi_status_x32 __user *src)
+{
+	int err;
+	struct snd_rawmidi_status status;
+
+	if (rfile->output == NULL)
+		return -EINVAL;
+	if (get_user(status.stream, &src->stream))
+		return -EFAULT;
+
+	switch (status.stream) {
+	case SNDRV_RAWMIDI_STREAM_OUTPUT:
+		err = snd_rawmidi_output_status(rfile->output, &status);
+		break;
+	case SNDRV_RAWMIDI_STREAM_INPUT:
+		err = snd_rawmidi_input_status(rfile->input, &status);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	if (put_timespec(&status.tstamp, &src->tstamp) ||
+	    put_user(status.avail, &src->avail) ||
+	    put_user(status.xruns, &src->xruns))
+		return -EFAULT;
+
+	return 0;
+}
+#endif /* CONFIG_X86_X32 */
+
 enum {
 	SNDRV_RAWMIDI_IOCTL_PARAMS32 = _IOWR('W', 0x10, struct snd_rawmidi_params32),
 	SNDRV_RAWMIDI_IOCTL_STATUS32 = _IOWR('W', 0x20, struct snd_rawmidi_status32),
+#ifdef CONFIG_X86_X32
+	SNDRV_RAWMIDI_IOCTL_STATUS_X32 = _IOWR('W', 0x20, struct snd_rawmidi_status_x32),
+#endif /* CONFIG_X86_X32 */
 };
 
 static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -115,6 +163,10 @@
 		return snd_rawmidi_ioctl_params_compat(rfile, argp);
 	case SNDRV_RAWMIDI_IOCTL_STATUS32:
 		return snd_rawmidi_ioctl_status_compat(rfile, argp);
+#ifdef CONFIG_X86_X32
+	case SNDRV_RAWMIDI_IOCTL_STATUS_X32:
+		return snd_rawmidi_ioctl_status_x32(rfile, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 	return -ENOIOCTLCMD;
 }
diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c
index 8db156b..8cdf489 100644
--- a/sound/core/seq/oss/seq_oss.c
+++ b/sound/core/seq/oss/seq_oss.c
@@ -149,8 +149,6 @@
 	if ((dp = file->private_data) == NULL)
 		return 0;
 
-	snd_seq_oss_drain_write(dp);
-
 	mutex_lock(&register_mutex);
 	snd_seq_oss_release(dp);
 	mutex_unlock(&register_mutex);
diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h
index b439243..d7b4d01 100644
--- a/sound/core/seq/oss/seq_oss_device.h
+++ b/sound/core/seq/oss/seq_oss_device.h
@@ -127,7 +127,6 @@
 unsigned int snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wait);
 
 void snd_seq_oss_reset(struct seq_oss_devinfo *dp);
-void snd_seq_oss_drain_write(struct seq_oss_devinfo *dp);
 
 /* */
 void snd_seq_oss_process_queue(struct seq_oss_devinfo *dp, abstime_t time);
diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index b1221b2..92c96a9 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -202,7 +202,7 @@
 
 	dp->index = i;
 	if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-		pr_err("ALSA: seq_oss: too many applications\n");
+		pr_debug("ALSA: seq_oss: too many applications\n");
 		rc = -ENOMEM;
 		goto _error;
 	}
@@ -436,22 +436,6 @@
 
 
 /*
- * Wait until the queue is empty (if we don't have nonblock)
- */
-void
-snd_seq_oss_drain_write(struct seq_oss_devinfo *dp)
-{
-	if (! dp->timer->running)
-		return;
-	if (is_write_mode(dp->file_mode) && !is_nonblock_mode(dp->file_mode) &&
-	    dp->writeq) {
-		while (snd_seq_oss_writeq_sync(dp->writeq))
-			;
-	}
-}
-
-
-/*
  * reset sequencer devices
  */
 void
diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c
index 0f3b381..b16dbef 100644
--- a/sound/core/seq/oss/seq_oss_synth.c
+++ b/sound/core/seq/oss/seq_oss_synth.c
@@ -308,7 +308,7 @@
 	struct seq_oss_synth *rec;
 	struct seq_oss_synthinfo *info;
 
-	if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+	if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
 		return;
 	for (i = 0; i < dp->max_synthdev; i++) {
 		info = &dp->synths[i];
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 13cfa81..58e79e0 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -678,6 +678,9 @@
 	else
 		down_read(&grp->list_mutex);
 	list_for_each_entry(subs, &grp->list_head, src_list) {
+		/* both ports ready? */
+		if (atomic_read(&subs->ref_count) != 2)
+			continue;
 		event->dest = subs->info.dest;
 		if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP)
 			/* convert time according to flag with subscription */
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index 8010766..c850345 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -383,15 +383,20 @@
 
 	if (snd_BUG_ON(!pool))
 		return -EINVAL;
-	if (pool->ptr)			/* should be atomic? */
-		return 0;
 
-	pool->ptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size);
-	if (!pool->ptr)
+	cellptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size);
+	if (!cellptr)
 		return -ENOMEM;
 
 	/* add new cells to the free cell list */
 	spin_lock_irqsave(&pool->lock, flags);
+	if (pool->ptr) {
+		spin_unlock_irqrestore(&pool->lock, flags);
+		vfree(cellptr);
+		return 0;
+	}
+
+	pool->ptr = cellptr;
 	pool->free = NULL;
 
 	for (cell = 0; cell < pool->size; cell++) {
diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index 55170a2..fe686ee 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -173,10 +173,6 @@
 }
 
 /* */
-enum group_type {
-	SRC_LIST, DEST_LIST
-};
-
 static int subscribe_port(struct snd_seq_client *client,
 			  struct snd_seq_client_port *port,
 			  struct snd_seq_port_subs_info *grp,
@@ -203,6 +199,20 @@
 	return NULL;
 }
 
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+					struct snd_seq_client_port *port,
+					struct snd_seq_subscribers *subs,
+					bool is_src, bool ack);
+
+static inline struct snd_seq_subscribers *
+get_subscriber(struct list_head *p, bool is_src)
+{
+	if (is_src)
+		return list_entry(p, struct snd_seq_subscribers, src_list);
+	else
+		return list_entry(p, struct snd_seq_subscribers, dest_list);
+}
+
 /*
  * remove all subscribers on the list
  * this is called from port_delete, for each src and dest list.
@@ -210,7 +220,7 @@
 static void clear_subscriber_list(struct snd_seq_client *client,
 				  struct snd_seq_client_port *port,
 				  struct snd_seq_port_subs_info *grp,
-				  int grptype)
+				  int is_src)
 {
 	struct list_head *p, *n;
 
@@ -219,15 +229,13 @@
 		struct snd_seq_client *c;
 		struct snd_seq_client_port *aport;
 
-		if (grptype == SRC_LIST) {
-			subs = list_entry(p, struct snd_seq_subscribers, src_list);
+		subs = get_subscriber(p, is_src);
+		if (is_src)
 			aport = get_client_port(&subs->info.dest, &c);
-		} else {
-			subs = list_entry(p, struct snd_seq_subscribers, dest_list);
+		else
 			aport = get_client_port(&subs->info.sender, &c);
-		}
-		list_del(p);
-		unsubscribe_port(client, port, grp, &subs->info, 0);
+		delete_and_unsubscribe_port(client, port, subs, is_src, false);
+
 		if (!aport) {
 			/* looks like the connected port is being deleted.
 			 * we decrease the counter, and when both ports are deleted
@@ -235,21 +243,14 @@
 			 */
 			if (atomic_dec_and_test(&subs->ref_count))
 				kfree(subs);
-		} else {
-			/* ok we got the connected port */
-			struct snd_seq_port_subs_info *agrp;
-			agrp = (grptype == SRC_LIST) ? &aport->c_dest : &aport->c_src;
-			down_write(&agrp->list_mutex);
-			if (grptype == SRC_LIST)
-				list_del(&subs->dest_list);
-			else
-				list_del(&subs->src_list);
-			up_write(&agrp->list_mutex);
-			unsubscribe_port(c, aport, agrp, &subs->info, 1);
-			kfree(subs);
-			snd_seq_port_unlock(aport);
-			snd_seq_client_unlock(c);
+			continue;
 		}
+
+		/* ok we got the connected port */
+		delete_and_unsubscribe_port(c, aport, subs, !is_src, true);
+		kfree(subs);
+		snd_seq_port_unlock(aport);
+		snd_seq_client_unlock(c);
 	}
 }
 
@@ -262,8 +263,8 @@
 	snd_use_lock_sync(&port->use_lock); 
 
 	/* clear subscribers info */
-	clear_subscriber_list(client, port, &port->c_src, SRC_LIST);
-	clear_subscriber_list(client, port, &port->c_dest, DEST_LIST);
+	clear_subscriber_list(client, port, &port->c_src, true);
+	clear_subscriber_list(client, port, &port->c_dest, false);
 
 	if (port->private_free)
 		port->private_free(port->private_data);
@@ -479,6 +480,78 @@
 	return 0;
 }
 
+static int check_and_subscribe_port(struct snd_seq_client *client,
+				    struct snd_seq_client_port *port,
+				    struct snd_seq_subscribers *subs,
+				    bool is_src, bool exclusive, bool ack)
+{
+	struct snd_seq_port_subs_info *grp;
+	struct list_head *p;
+	struct snd_seq_subscribers *s;
+	int err;
+
+	grp = is_src ? &port->c_src : &port->c_dest;
+	err = -EBUSY;
+	down_write(&grp->list_mutex);
+	if (exclusive) {
+		if (!list_empty(&grp->list_head))
+			goto __error;
+	} else {
+		if (grp->exclusive)
+			goto __error;
+		/* check whether already exists */
+		list_for_each(p, &grp->list_head) {
+			s = get_subscriber(p, is_src);
+			if (match_subs_info(&subs->info, &s->info))
+				goto __error;
+		}
+	}
+
+	err = subscribe_port(client, port, grp, &subs->info, ack);
+	if (err < 0) {
+		grp->exclusive = 0;
+		goto __error;
+	}
+
+	/* add to list */
+	write_lock_irq(&grp->list_lock);
+	if (is_src)
+		list_add_tail(&subs->src_list, &grp->list_head);
+	else
+		list_add_tail(&subs->dest_list, &grp->list_head);
+	grp->exclusive = exclusive;
+	atomic_inc(&subs->ref_count);
+	write_unlock_irq(&grp->list_lock);
+	err = 0;
+
+ __error:
+	up_write(&grp->list_mutex);
+	return err;
+}
+
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+					struct snd_seq_client_port *port,
+					struct snd_seq_subscribers *subs,
+					bool is_src, bool ack)
+{
+	struct snd_seq_port_subs_info *grp;
+	struct list_head *list;
+	bool empty;
+
+	grp = is_src ? &port->c_src : &port->c_dest;
+	list = is_src ? &subs->src_list : &subs->dest_list;
+	down_write(&grp->list_mutex);
+	write_lock_irq(&grp->list_lock);
+	empty = list_empty(list);
+	if (!empty)
+		list_del_init(list);
+	grp->exclusive = 0;
+	write_unlock_irq(&grp->list_lock);
+	up_write(&grp->list_mutex);
+
+	if (!empty)
+		unsubscribe_port(client, port, grp, &subs->info, ack);
+}
 
 /* connect two ports */
 int snd_seq_port_connect(struct snd_seq_client *connector,
@@ -488,76 +561,42 @@
 			 struct snd_seq_client_port *dest_port,
 			 struct snd_seq_port_subscribe *info)
 {
-	struct snd_seq_port_subs_info *src = &src_port->c_src;
-	struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
-	struct snd_seq_subscribers *subs, *s;
-	int err, src_called = 0;
-	unsigned long flags;
-	int exclusive;
+	struct snd_seq_subscribers *subs;
+	bool exclusive;
+	int err;
 
 	subs = kzalloc(sizeof(*subs), GFP_KERNEL);
-	if (! subs)
+	if (!subs)
 		return -ENOMEM;
 
 	subs->info = *info;
-	atomic_set(&subs->ref_count, 2);
+	atomic_set(&subs->ref_count, 0);
+	INIT_LIST_HEAD(&subs->src_list);
+	INIT_LIST_HEAD(&subs->dest_list);
 
-	down_write(&src->list_mutex);
-	down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
+	exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE);
 
-	exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
-	err = -EBUSY;
-	if (exclusive) {
-		if (! list_empty(&src->list_head) || ! list_empty(&dest->list_head))
-			goto __error;
-	} else {
-		if (src->exclusive || dest->exclusive)
-			goto __error;
-		/* check whether already exists */
-		list_for_each_entry(s, &src->list_head, src_list) {
-			if (match_subs_info(info, &s->info))
-				goto __error;
-		}
-		list_for_each_entry(s, &dest->list_head, dest_list) {
-			if (match_subs_info(info, &s->info))
-				goto __error;
-		}
-	}
+	err = check_and_subscribe_port(src_client, src_port, subs, true,
+				       exclusive,
+				       connector->number != src_client->number);
+	if (err < 0)
+		goto error;
+	err = check_and_subscribe_port(dest_client, dest_port, subs, false,
+				       exclusive,
+				       connector->number != dest_client->number);
+	if (err < 0)
+		goto error_dest;
 
-	if ((err = subscribe_port(src_client, src_port, src, info,
-				  connector->number != src_client->number)) < 0)
-		goto __error;
-	src_called = 1;
-
-	if ((err = subscribe_port(dest_client, dest_port, dest, info,
-				  connector->number != dest_client->number)) < 0)
-		goto __error;
-
-	/* add to list */
-	write_lock_irqsave(&src->list_lock, flags);
-	// write_lock(&dest->list_lock); // no other lock yet
-	list_add_tail(&subs->src_list, &src->list_head);
-	list_add_tail(&subs->dest_list, &dest->list_head);
-	// write_unlock(&dest->list_lock); // no other lock yet
-	write_unlock_irqrestore(&src->list_lock, flags);
-
-	src->exclusive = dest->exclusive = exclusive;
-
-	up_write(&dest->list_mutex);
-	up_write(&src->list_mutex);
 	return 0;
 
- __error:
-	if (src_called)
-		unsubscribe_port(src_client, src_port, src, info,
-				 connector->number != src_client->number);
+ error_dest:
+	delete_and_unsubscribe_port(src_client, src_port, subs, true,
+				    connector->number != src_client->number);
+ error:
 	kfree(subs);
-	up_write(&dest->list_mutex);
-	up_write(&src->list_mutex);
 	return err;
 }
 
-
 /* remove the connection */
 int snd_seq_port_disconnect(struct snd_seq_client *connector,
 			    struct snd_seq_client *src_client,
@@ -567,37 +606,28 @@
 			    struct snd_seq_port_subscribe *info)
 {
 	struct snd_seq_port_subs_info *src = &src_port->c_src;
-	struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
 	struct snd_seq_subscribers *subs;
 	int err = -ENOENT;
-	unsigned long flags;
 
 	down_write(&src->list_mutex);
-	down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
 	/* look for the connection */
 	list_for_each_entry(subs, &src->list_head, src_list) {
 		if (match_subs_info(info, &subs->info)) {
-			write_lock_irqsave(&src->list_lock, flags);
-			// write_lock(&dest->list_lock);  // no lock yet
-			list_del(&subs->src_list);
-			list_del(&subs->dest_list);
-			// write_unlock(&dest->list_lock);
-			write_unlock_irqrestore(&src->list_lock, flags);
-			src->exclusive = dest->exclusive = 0;
-			unsubscribe_port(src_client, src_port, src, info,
-					 connector->number != src_client->number);
-			unsubscribe_port(dest_client, dest_port, dest, info,
-					 connector->number != dest_client->number);
-			kfree(subs);
+			atomic_dec(&subs->ref_count); /* mark as not ready */
 			err = 0;
 			break;
 		}
 	}
-
-	up_write(&dest->list_mutex);
 	up_write(&src->list_mutex);
-	return err;
+	if (err < 0)
+		return err;
+
+	delete_and_unsubscribe_port(src_client, src_port, subs, true,
+				    connector->number != src_client->number);
+	delete_and_unsubscribe_port(dest_client, dest_port, subs, false,
+				    connector->number != dest_client->number);
+	kfree(subs);
+	return 0;
 }
 
 
diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index 82b220c..2931049 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -90,6 +90,9 @@
 
 void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&tmr->lock, flags);
 	/* setup defaults */
 	tmr->ppq = 96;		/* 96 PPQ */
 	tmr->tempo = 500000;	/* 120 BPM */
@@ -105,21 +108,25 @@
 	tmr->preferred_resolution = seq_default_timer_resolution;
 
 	tmr->skew = tmr->skew_base = SKEW_BASE;
+	spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
-void snd_seq_timer_reset(struct snd_seq_timer * tmr)
+static void seq_timer_reset(struct snd_seq_timer *tmr)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&tmr->lock, flags);
-
 	/* reset time & songposition */
 	tmr->cur_time.tv_sec = 0;
 	tmr->cur_time.tv_nsec = 0;
 
 	tmr->tick.cur_tick = 0;
 	tmr->tick.fraction = 0;
+}
 
+void snd_seq_timer_reset(struct snd_seq_timer *tmr)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tmr->lock, flags);
+	seq_timer_reset(tmr);
 	spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
@@ -138,8 +145,11 @@
 	tmr = q->timer;
 	if (tmr == NULL)
 		return;
-	if (!tmr->running)
+	spin_lock_irqsave(&tmr->lock, flags);
+	if (!tmr->running) {
+		spin_unlock_irqrestore(&tmr->lock, flags);
 		return;
+	}
 
 	resolution *= ticks;
 	if (tmr->skew != tmr->skew_base) {
@@ -148,8 +158,6 @@
 			(((resolution & 0xffff) * tmr->skew) >> 16);
 	}
 
-	spin_lock_irqsave(&tmr->lock, flags);
-
 	/* update timer */
 	snd_seq_inc_time_nsec(&tmr->cur_time, resolution);
 
@@ -296,26 +304,30 @@
 	t->callback = snd_seq_timer_interrupt;
 	t->callback_data = q;
 	t->flags |= SNDRV_TIMER_IFLG_AUTO;
+	spin_lock_irq(&tmr->lock);
 	tmr->timeri = t;
+	spin_unlock_irq(&tmr->lock);
 	return 0;
 }
 
 int snd_seq_timer_close(struct snd_seq_queue *q)
 {
 	struct snd_seq_timer *tmr;
+	struct snd_timer_instance *t;
 	
 	tmr = q->timer;
 	if (snd_BUG_ON(!tmr))
 		return -EINVAL;
-	if (tmr->timeri) {
-		snd_timer_stop(tmr->timeri);
-		snd_timer_close(tmr->timeri);
-		tmr->timeri = NULL;
-	}
+	spin_lock_irq(&tmr->lock);
+	t = tmr->timeri;
+	tmr->timeri = NULL;
+	spin_unlock_irq(&tmr->lock);
+	if (t)
+		snd_timer_close(t);
 	return 0;
 }
 
-int snd_seq_timer_stop(struct snd_seq_timer * tmr)
+static int seq_timer_stop(struct snd_seq_timer *tmr)
 {
 	if (! tmr->timeri)
 		return -EINVAL;
@@ -326,6 +338,17 @@
 	return 0;
 }
 
+int snd_seq_timer_stop(struct snd_seq_timer *tmr)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&tmr->lock, flags);
+	err = seq_timer_stop(tmr);
+	spin_unlock_irqrestore(&tmr->lock, flags);
+	return err;
+}
+
 static int initialize_timer(struct snd_seq_timer *tmr)
 {
 	struct snd_timer *t;
@@ -358,13 +381,13 @@
 	return 0;
 }
 
-int snd_seq_timer_start(struct snd_seq_timer * tmr)
+static int seq_timer_start(struct snd_seq_timer *tmr)
 {
 	if (! tmr->timeri)
 		return -EINVAL;
 	if (tmr->running)
-		snd_seq_timer_stop(tmr);
-	snd_seq_timer_reset(tmr);
+		seq_timer_stop(tmr);
+	seq_timer_reset(tmr);
 	if (initialize_timer(tmr) < 0)
 		return -EINVAL;
 	snd_timer_start(tmr->timeri, tmr->ticks);
@@ -373,14 +396,25 @@
 	return 0;
 }
 
-int snd_seq_timer_continue(struct snd_seq_timer * tmr)
+int snd_seq_timer_start(struct snd_seq_timer *tmr)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&tmr->lock, flags);
+	err = seq_timer_start(tmr);
+	spin_unlock_irqrestore(&tmr->lock, flags);
+	return err;
+}
+
+static int seq_timer_continue(struct snd_seq_timer *tmr)
 {
 	if (! tmr->timeri)
 		return -EINVAL;
 	if (tmr->running)
 		return -EBUSY;
 	if (! tmr->initialized) {
-		snd_seq_timer_reset(tmr);
+		seq_timer_reset(tmr);
 		if (initialize_timer(tmr) < 0)
 			return -EINVAL;
 	}
@@ -390,11 +424,24 @@
 	return 0;
 }
 
+int snd_seq_timer_continue(struct snd_seq_timer *tmr)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&tmr->lock, flags);
+	err = seq_timer_continue(tmr);
+	spin_unlock_irqrestore(&tmr->lock, flags);
+	return err;
+}
+
 /* return current 'real' time. use timeofday() to get better granularity. */
 snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
 {
 	snd_seq_real_time_t cur_time;
+	unsigned long flags;
 
+	spin_lock_irqsave(&tmr->lock, flags);
 	cur_time = tmr->cur_time;
 	if (tmr->running) { 
 		struct timeval tm;
@@ -410,7 +457,7 @@
 		}
 		snd_seq_sanity_real_time(&cur_time);
 	}
-                
+	spin_unlock_irqrestore(&tmr->lock, flags);
 	return cur_time;	
 }
 
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 3da2d48..c82ed3e 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -155,21 +155,26 @@
 	struct snd_virmidi *vmidi = substream->runtime->private_data;
 	int count, res;
 	unsigned char buf[32], *pbuf;
+	unsigned long flags;
 
 	if (up) {
 		vmidi->trigger = 1;
 		if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH &&
 		    !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) {
-			snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail);
-			return;		/* ignored */
+			while (snd_rawmidi_transmit(substream, buf,
+						    sizeof(buf)) > 0) {
+				/* ignored */
+			}
+			return;
 		}
 		if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
 			if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
 				return;
 			vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
 		}
+		spin_lock_irqsave(&substream->runtime->lock, flags);
 		while (1) {
-			count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
+			count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
 			if (count <= 0)
 				break;
 			pbuf = buf;
@@ -179,16 +184,18 @@
 					snd_midi_event_reset_encode(vmidi->parser);
 					continue;
 				}
-				snd_rawmidi_transmit_ack(substream, res);
+				__snd_rawmidi_transmit_ack(substream, res);
 				pbuf += res;
 				count -= res;
 				if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
 					if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-						return;
+						goto out;
 					vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
 				}
 			}
 		}
+	out:
+		spin_unlock_irqrestore(&substream->runtime->lock, flags);
 	} else {
 		vmidi->trigger = 0;
 	}
@@ -254,9 +261,13 @@
  */
 static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream)
 {
+	struct snd_virmidi_dev *rdev = substream->rmidi->private_data;
 	struct snd_virmidi *vmidi = substream->runtime->private_data;
-	snd_midi_event_free(vmidi->parser);
+
+	write_lock_irq(&rdev->filelist_lock);
 	list_del(&vmidi->list);
+	write_unlock_irq(&rdev->filelist_lock);
+	snd_midi_event_free(vmidi->parser);
 	substream->runtime->private_data = NULL;
 	kfree(vmidi);
 	return 0;
diff --git a/sound/core/timer.c b/sound/core/timer.c
index af1f68f..dca817f 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -422,7 +422,7 @@
 	spin_lock_irqsave(&timer->lock, flags);
 	list_for_each_entry(ts, &ti->slave_active_head, active_list)
 		if (ts->ccallback)
-			ts->ccallback(ti, event + 100, &tstamp, resolution);
+			ts->ccallback(ts, event + 100, &tstamp, resolution);
 	spin_unlock_irqrestore(&timer->lock, flags);
 }
 
@@ -451,6 +451,10 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&slave_active_lock, flags);
+	if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+		spin_unlock_irqrestore(&slave_active_lock, flags);
+		return -EBUSY;
+	}
 	timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
 	if (timeri->master && timeri->timer) {
 		spin_lock(&timeri->timer->lock);
@@ -475,7 +479,8 @@
 		return -EINVAL;
 	if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
 		result = snd_timer_start_slave(timeri);
-		snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+		if (result >= 0)
+			snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
 		return result;
 	}
 	timer = timeri->timer;
@@ -484,11 +489,18 @@
 	if (timer->card && timer->card->shutdown)
 		return -ENODEV;
 	spin_lock_irqsave(&timer->lock, flags);
+	if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+			     SNDRV_TIMER_IFLG_START)) {
+		result = -EBUSY;
+		goto unlock;
+	}
 	timeri->ticks = timeri->cticks = ticks;
 	timeri->pticks = 0;
 	result = snd_timer_start1(timer, timeri, ticks);
+ unlock:
 	spin_unlock_irqrestore(&timer->lock, flags);
-	snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+	if (result >= 0)
+		snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
 	return result;
 }
 
@@ -502,9 +514,17 @@
 
 	if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
 		spin_lock_irqsave(&slave_active_lock, flags);
+		if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) {
+			spin_unlock_irqrestore(&slave_active_lock, flags);
+			return -EBUSY;
+		}
+		if (timeri->timer)
+			spin_lock(&timeri->timer->lock);
 		timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
 		list_del_init(&timeri->ack_list);
 		list_del_init(&timeri->active_list);
+		if (timeri->timer)
+			spin_unlock(&timeri->timer->lock);
 		spin_unlock_irqrestore(&slave_active_lock, flags);
 		goto __end;
 	}
@@ -512,6 +532,11 @@
 	if (!timer)
 		return -EINVAL;
 	spin_lock_irqsave(&timer->lock, flags);
+	if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+			       SNDRV_TIMER_IFLG_START))) {
+		spin_unlock_irqrestore(&timer->lock, flags);
+		return -EBUSY;
+	}
 	list_del_init(&timeri->ack_list);
 	list_del_init(&timeri->active_list);
 	if (timer->card && timer->card->shutdown) {
@@ -581,10 +606,15 @@
 	if (timer->card && timer->card->shutdown)
 		return -ENODEV;
 	spin_lock_irqsave(&timer->lock, flags);
+	if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+		result = -EBUSY;
+		goto unlock;
+	}
 	if (!timeri->cticks)
 		timeri->cticks = 1;
 	timeri->pticks = 0;
 	result = snd_timer_start1(timer, timeri, timer->sticks);
+ unlock:
 	spin_unlock_irqrestore(&timer->lock, flags);
 	snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_CONTINUE);
 	return result;
@@ -718,8 +748,8 @@
 			ti->cticks = ti->ticks;
 		} else {
 			ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
-			if (--timer->running)
-				list_del_init(&ti->active_list);
+			--timer->running;
+			list_del_init(&ti->active_list);
 		}
 		if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) ||
 		    (ti->flags & SNDRV_TIMER_IFLG_FAST))
@@ -1032,11 +1062,21 @@
 	return 0;
 }
 
+static int snd_timer_s_close(struct snd_timer *timer)
+{
+	struct snd_timer_system_private *priv;
+
+	priv = (struct snd_timer_system_private *)timer->private_data;
+	del_timer_sync(&priv->tlist);
+	return 0;
+}
+
 static struct snd_timer_hardware snd_timer_system =
 {
 	.flags =	SNDRV_TIMER_HW_FIRST | SNDRV_TIMER_HW_TASKLET,
 	.resolution =	1000000000L / HZ,
 	.ticks =	10000000L,
+	.close =	snd_timer_s_close,
 	.start =	snd_timer_s_start,
 	.stop =		snd_timer_s_stop
 };
@@ -1893,6 +1933,7 @@
 {
 	struct snd_timer_user *tu;
 	long result = 0, unit;
+	int qhead;
 	int err = 0;
 
 	tu = file->private_data;
@@ -1904,7 +1945,7 @@
 
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
-				break;
+				goto _error;
 			}
 
 			set_current_state(TASK_INTERRUPTIBLE);
@@ -1919,42 +1960,37 @@
 
 			if (tu->disconnected) {
 				err = -ENODEV;
-				break;
+				goto _error;
 			}
 			if (signal_pending(current)) {
 				err = -ERESTARTSYS;
-				break;
+				goto _error;
 			}
 		}
 
+		qhead = tu->qhead++;
+		tu->qhead %= tu->queue_size;
 		spin_unlock_irq(&tu->qlock);
-		if (err < 0)
-			goto _error;
 
 		if (tu->tread) {
-			if (copy_to_user(buffer, &tu->tqueue[tu->qhead++],
-					 sizeof(struct snd_timer_tread))) {
+			if (copy_to_user(buffer, &tu->tqueue[qhead],
+					 sizeof(struct snd_timer_tread)))
 				err = -EFAULT;
-				goto _error;
-			}
 		} else {
-			if (copy_to_user(buffer, &tu->queue[tu->qhead++],
-					 sizeof(struct snd_timer_read))) {
+			if (copy_to_user(buffer, &tu->queue[qhead],
+					 sizeof(struct snd_timer_read)))
 				err = -EFAULT;
-				goto _error;
-			}
 		}
 
-		tu->qhead %= tu->queue_size;
-
-		result += unit;
-		buffer += unit;
-
 		spin_lock_irq(&tu->qlock);
 		tu->qused--;
+		if (err < 0)
+			goto _error;
+		result += unit;
+		buffer += unit;
 	}
-	spin_unlock_irq(&tu->qlock);
  _error:
+	spin_unlock_irq(&tu->qlock);
 	return result > 0 ? result : err;
 }
 
diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c
index e05802a..2e90822 100644
--- a/sound/core/timer_compat.c
+++ b/sound/core/timer_compat.c
@@ -70,13 +70,14 @@
 					struct snd_timer_status32 __user *_status)
 {
 	struct snd_timer_user *tu;
-	struct snd_timer_status status;
+	struct snd_timer_status32 status;
 	
 	tu = file->private_data;
 	if (snd_BUG_ON(!tu->timeri))
 		return -ENXIO;
 	memset(&status, 0, sizeof(status));
-	status.tstamp = tu->tstamp;
+	status.tstamp.tv_sec = tu->tstamp.tv_sec;
+	status.tstamp.tv_nsec = tu->tstamp.tv_nsec;
 	status.resolution = snd_timer_resolution(tu->timeri);
 	status.lost = tu->timeri->lost;
 	status.overrun = tu->overrun;
@@ -88,12 +89,21 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has the same struct as x86-64 */
+#define snd_timer_user_status_x32(file, s) \
+	snd_timer_user_status(file, s)
+#endif /* CONFIG_X86_X32 */
+
 /*
  */
 
 enum {
 	SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32),
 	SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32),
+#ifdef CONFIG_X86_X32
+	SNDRV_TIMER_IOCTL_STATUS_X32 = _IOW('T', 0x14, struct snd_timer_status),
+#endif /* CONFIG_X86_X32 */
 };
 
 static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -122,6 +132,10 @@
 		return snd_timer_user_info_compat(file, argp);
 	case SNDRV_TIMER_IOCTL_STATUS32:
 		return snd_timer_user_status_compat(file, argp);
+#ifdef CONFIG_X86_X32
+	case SNDRV_TIMER_IOCTL_STATUS_X32:
+		return snd_timer_user_status_x32(file, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 	return -ENOIOCTLCMD;
 }
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 75b7485..c0f8f61 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -109,6 +109,9 @@
 	snd_pcm_uframes_t (*pointer)(struct snd_pcm_substream *);
 };
 
+#define get_dummy_ops(substream) \
+	(*(const struct dummy_timer_ops **)(substream)->runtime->private_data)
+
 struct dummy_model {
 	const char *name;
 	int (*playback_constraints)(struct snd_pcm_runtime *runtime);
@@ -137,7 +140,6 @@
 	int iobox;
 	struct snd_kcontrol *cd_volume_ctl;
 	struct snd_kcontrol *cd_switch_ctl;
-	const struct dummy_timer_ops *timer_ops;
 };
 
 /*
@@ -231,6 +233,8 @@
  */
 
 struct dummy_systimer_pcm {
+	/* ops must be the first item */
+	const struct dummy_timer_ops *timer_ops;
 	spinlock_t lock;
 	struct timer_list timer;
 	unsigned long base_time;
@@ -366,6 +370,8 @@
  */
 
 struct dummy_hrtimer_pcm {
+	/* ops must be the first item */
+	const struct dummy_timer_ops *timer_ops;
 	ktime_t base_time;
 	ktime_t period_time;
 	atomic_t running;
@@ -492,31 +498,25 @@
 
 static int dummy_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
-	struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
 	case SNDRV_PCM_TRIGGER_RESUME:
-		return dummy->timer_ops->start(substream);
+		return get_dummy_ops(substream)->start(substream);
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
-		return dummy->timer_ops->stop(substream);
+		return get_dummy_ops(substream)->stop(substream);
 	}
 	return -EINVAL;
 }
 
 static int dummy_pcm_prepare(struct snd_pcm_substream *substream)
 {
-	struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-
-	return dummy->timer_ops->prepare(substream);
+	return get_dummy_ops(substream)->prepare(substream);
 }
 
 static snd_pcm_uframes_t dummy_pcm_pointer(struct snd_pcm_substream *substream)
 {
-	struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-
-	return dummy->timer_ops->pointer(substream);
+	return get_dummy_ops(substream)->pointer(substream);
 }
 
 static struct snd_pcm_hardware dummy_pcm_hardware = {
@@ -562,17 +562,19 @@
 	struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
 	struct dummy_model *model = dummy->model;
 	struct snd_pcm_runtime *runtime = substream->runtime;
+	const struct dummy_timer_ops *ops;
 	int err;
 
-	dummy->timer_ops = &dummy_systimer_ops;
+	ops = &dummy_systimer_ops;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	if (hrtimer)
-		dummy->timer_ops = &dummy_hrtimer_ops;
+		ops = &dummy_hrtimer_ops;
 #endif
 
-	err = dummy->timer_ops->create(substream);
+	err = ops->create(substream);
 	if (err < 0)
 		return err;
+	get_dummy_ops(substream) = ops;
 
 	runtime->hw = dummy->pcm_hw;
 	if (substream->pcm->device & 1) {
@@ -594,7 +596,7 @@
 			err = model->capture_constraints(substream->runtime);
 	}
 	if (err < 0) {
-		dummy->timer_ops->free(substream);
+		get_dummy_ops(substream)->free(substream);
 		return err;
 	}
 	return 0;
@@ -602,8 +604,7 @@
 
 static int dummy_pcm_close(struct snd_pcm_substream *substream)
 {
-	struct snd_dummy *dummy = snd_pcm_substream_chip(substream);
-	dummy->timer_ops->free(substream);
+	get_dummy_ops(substream)->free(substream);
 	return 0;
 }
 
diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 926e5dc..5022c9b 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -47,14 +47,16 @@
 	[6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-		if (snd_bebob_rate_table[i] == rate)
-			return i;
+		if (snd_bebob_rate_table[i] == rate) {
+			*index = i;
+			return 0;
+		}
 	}
 	return -EINVAL;
 }
@@ -425,7 +427,9 @@
 		goto end;
 
 	/* confirm params for both streams */
-	index = get_formation_index(rate);
+	err = get_formation_index(rate, &index);
+	if (err < 0)
+		goto end;
 	pcm_channels = bebob->tx_stream_formations[index].pcm;
 	midi_channels = bebob->tx_stream_formations[index].midi;
 	err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
diff --git a/sound/firewire/digi00x/amdtp-dot.c b/sound/firewire/digi00x/amdtp-dot.c
index b02a5e8c..0ac92ab 100644
--- a/sound/firewire/digi00x/amdtp-dot.c
+++ b/sound/firewire/digi00x/amdtp-dot.c
@@ -63,7 +63,7 @@
 #define BYTE_PER_SAMPLE (4)
 #define MAGIC_DOT_BYTE (2)
 #define MAGIC_BYTE_OFF(x) (((x) * BYTE_PER_SAMPLE) + MAGIC_DOT_BYTE)
-static const u8 dot_scrt(const u8 idx, const unsigned int off)
+static u8 dot_scrt(const u8 idx, const unsigned int off)
 {
 	/*
 	 * the length of the added pattern only depends on the lower nibble
diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c
index 904ce03..040a96d 100644
--- a/sound/firewire/tascam/tascam-transaction.c
+++ b/sound/firewire/tascam/tascam-transaction.c
@@ -230,6 +230,7 @@
 	return err;
 error:
 	fw_core_remove_address_handler(&tscm->async_handler);
+	tscm->async_handler.callback_data = NULL;
 	return err;
 }
 
@@ -276,6 +277,9 @@
 	__be32 reg;
 	unsigned int i;
 
+	if (tscm->async_handler.callback_data == NULL)
+		return;
+
 	/* Turn off FireWire LED. */
 	reg = cpu_to_be32(0x0000008e);
 	snd_fw_transaction(tscm->unit, TCODE_WRITE_QUADLET_REQUEST,
@@ -297,6 +301,8 @@
 			   &reg, sizeof(reg), 0);
 
 	fw_core_remove_address_handler(&tscm->async_handler);
+	tscm->async_handler.callback_data = NULL;
+
 	for (i = 0; i < TSCM_MIDI_OUT_PORT_MAX; i++)
 		snd_fw_async_midi_port_destroy(&tscm->out_ports[i]);
 }
diff --git a/sound/firewire/tascam/tascam.c b/sound/firewire/tascam/tascam.c
index ee0bc18..e281c33 100644
--- a/sound/firewire/tascam/tascam.c
+++ b/sound/firewire/tascam/tascam.c
@@ -21,7 +21,6 @@
 		.pcm_playback_analog_channels = 8,
 		.midi_capture_ports = 4,
 		.midi_playback_ports = 4,
-		.is_controller = true,
 	},
 	{
 		.name = "FW-1082",
@@ -31,9 +30,16 @@
 		.pcm_playback_analog_channels = 2,
 		.midi_capture_ports = 2,
 		.midi_playback_ports = 2,
-		.is_controller = true,
 	},
-	/* FW-1804 may be supported. */
+	{
+		.name = "FW-1804",
+		.has_adat = true,
+		.has_spdif = true,
+		.pcm_capture_analog_channels = 8,
+		.pcm_playback_analog_channels = 2,
+		.midi_capture_ports = 2,
+		.midi_playback_ports = 4,
+	},
 };
 
 static int identify_model(struct snd_tscm *tscm)
diff --git a/sound/firewire/tascam/tascam.h b/sound/firewire/tascam/tascam.h
index 2d028d2..30ab77e 100644
--- a/sound/firewire/tascam/tascam.h
+++ b/sound/firewire/tascam/tascam.h
@@ -39,7 +39,6 @@
 	unsigned int pcm_playback_analog_channels;
 	unsigned int midi_capture_ports;
 	unsigned int midi_playback_ports;
-	bool is_controller;
 };
 
 #define TSCM_MIDI_IN_PORT_MAX	4
@@ -72,9 +71,6 @@
 	struct snd_fw_async_midi_port out_ports[TSCM_MIDI_OUT_PORT_MAX];
 	u8 running_status[TSCM_MIDI_OUT_PORT_MAX];
 	bool on_sysex[TSCM_MIDI_OUT_PORT_MAX];
-
-	/* For control messages. */
-	struct snd_firewire_tascam_status *status;
 };
 
 #define TSCM_ADDR_BASE			0xffff00000000ull
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index b5a17cb..8c48623 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -426,18 +426,22 @@
  * @bus: HD-audio core bus
  * @status: INTSTS register value
  * @ask: callback to be called for woken streams
+ *
+ * Returns the bits of handled streams, or zero if no stream is handled.
  */
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
 				    void (*ack)(struct hdac_bus *,
 						struct hdac_stream *))
 {
 	struct hdac_stream *azx_dev;
 	u8 sd_status;
+	int handled = 0;
 
 	list_for_each_entry(azx_dev, &bus->stream_list, list) {
 		if (status & azx_dev->sd_int_sta_mask) {
 			sd_status = snd_hdac_stream_readb(azx_dev, SD_STS);
 			snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK);
+			handled |= 1 << azx_dev->index;
 			if (!azx_dev->substream || !azx_dev->running ||
 			    !(sd_status & SD_INT_COMPLETE))
 				continue;
@@ -445,6 +449,7 @@
 				ack(bus, azx_dev);
 		}
 	}
+	return handled;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_handle_stream_irq);
 
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index 0216475..37adcc6 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+	select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Analog Devices SoundPort
 	  AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@
 	tristate "Gravis UltraSound Classic"
 	select SND_RAWMIDI
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Gravis UltraSound Classic
 	  soundcards.
@@ -221,6 +224,7 @@
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Gravis UltraSound Extreme
 	  soundcards.
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 656ce39..8f6594a 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -155,6 +155,7 @@
 	select SND_PCM
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
+	select SND_TIMER
 	depends on ZONE_DMA
 	help
 	  Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@
 	select SND_HWDEP
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
+	select SND_TIMER
 	depends on ZONE_DMA
 	help
 	  Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
+	select SND_TIMER
 	help
 	  Say Y here to include support for Yamaha PCI audio chips -
 	  YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index 28e2f8b..8914534 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -1141,6 +1141,14 @@
 		emu->emu1010.firmware_thread =
 			kthread_create(emu1010_firmware_thread, emu,
 				       "emu1010_firmware");
+		if (IS_ERR(emu->emu1010.firmware_thread)) {
+			err = PTR_ERR(emu->emu1010.firmware_thread);
+			emu->emu1010.firmware_thread = NULL;
+			dev_info(emu->card->dev,
+					"emu1010: Creating thread failed\n");
+			return err;
+		}
+
 		wake_up_process(emu->emu1010.firmware_thread);
 	}
 
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 37cf9ce..27de801 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -930,6 +930,8 @@
 	struct azx *chip = dev_id;
 	struct hdac_bus *bus = azx_bus(chip);
 	u32 status;
+	bool active, handled = false;
+	int repeat = 0; /* count for avoiding endless loop */
 
 #ifdef CONFIG_PM
 	if (azx_has_pm_runtime(chip))
@@ -939,33 +941,36 @@
 
 	spin_lock(&bus->reg_lock);
 
-	if (chip->disabled) {
-		spin_unlock(&bus->reg_lock);
-		return IRQ_NONE;
-	}
+	if (chip->disabled)
+		goto unlock;
 
-	status = azx_readl(chip, INTSTS);
-	if (status == 0 || status == 0xffffffff) {
-		spin_unlock(&bus->reg_lock);
-		return IRQ_NONE;
-	}
+	do {
+		status = azx_readl(chip, INTSTS);
+		if (status == 0 || status == 0xffffffff)
+			break;
 
-	snd_hdac_bus_handle_stream_irq(bus, status, stream_update);
+		handled = true;
+		active = false;
+		if (snd_hdac_bus_handle_stream_irq(bus, status, stream_update))
+			active = true;
 
-	/* clear rirb int */
-	status = azx_readb(chip, RIRBSTS);
-	if (status & RIRB_INT_MASK) {
-		if (status & RIRB_INT_RESPONSE) {
-			if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
-				udelay(80);
-			snd_hdac_bus_update_rirb(bus);
+		/* clear rirb int */
+		status = azx_readb(chip, RIRBSTS);
+		if (status & RIRB_INT_MASK) {
+			active = true;
+			if (status & RIRB_INT_RESPONSE) {
+				if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
+					udelay(80);
+				snd_hdac_bus_update_rirb(bus);
+			}
+			azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
 		}
-		azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
-	}
+	} while (active && ++repeat < 10);
 
+ unlock:
 	spin_unlock(&bus->reg_lock);
 
-	return IRQ_HANDLED;
+	return IRQ_RETVAL(handled);
 }
 EXPORT_SYMBOL_GPL(azx_interrupt);
 
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 30c8efe..7ca5b89 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -4028,9 +4028,9 @@
 			       struct hda_jack_callback *jack,
 			       bool on)
 {
-	if (jack && jack->tbl->nid)
+	if (jack && jack->nid)
 		sync_power_state_change(codec,
-					set_pin_power_jack(codec, jack->tbl->nid, on));
+					set_pin_power_jack(codec, jack->nid, on));
 }
 
 /* callback only doing power up -- called at first */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 256e6cd..e5240cb 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -90,6 +90,8 @@
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL	 0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -361,7 +363,10 @@
 					((pci)->device == 0x0d0c) || \
 					((pci)->device == 0x160c))
 
-#define IS_BROXTON(pci)	((pci)->device == 0x5a98)
+#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
+#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
+#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -534,15 +539,26 @@
 {
 	struct hdac_bus *bus = azx_bus(chip);
 	struct pci_dev *pci = chip->pci;
+	u32 val;
 
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, true);
+	if (IS_SKL_PLUS(pci)) {
+		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+		val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+	}
 	azx_init_chip(chip, full_reset);
+	if (IS_SKL_PLUS(pci)) {
+		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+		val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+	}
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, false);
 
 	/* reduce dma latency to avoid noise */
-	if (IS_BROXTON(pci))
+	if (IS_BXT(pci))
 		bxt_reduce_dma_latency(chip);
 }
 
@@ -964,11 +980,6 @@
 /* put codec down to D3 at hibernation for Intel SKL+;
  * otherwise BIOS may still access the codec and screw up the driver
  */
-#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
-#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
-#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
-
 static int azx_freeze_noirq(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
@@ -2155,10 +2166,10 @@
 	struct hda_intel *hda;
 
 	if (card) {
-		/* flush the pending probing work */
+		/* cancel the pending probing work */
 		chip = card->private_data;
 		hda = container_of(chip, struct hda_intel, chip);
-		flush_work(&hda->probe_work);
+		cancel_work_sync(&hda->probe_work);
 
 		snd_card_free(card);
 	}
diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c
index c945e25..a33234e 100644
--- a/sound/pci/hda/hda_jack.c
+++ b/sound/pci/hda/hda_jack.c
@@ -259,7 +259,7 @@
 		if (!callback)
 			return ERR_PTR(-ENOMEM);
 		callback->func = func;
-		callback->tbl = jack;
+		callback->nid = jack->nid;
 		callback->next = jack->callback;
 		jack->callback = callback;
 	}
diff --git a/sound/pci/hda/hda_jack.h b/sound/pci/hda/hda_jack.h
index 858708a..e9814c0 100644
--- a/sound/pci/hda/hda_jack.h
+++ b/sound/pci/hda/hda_jack.h
@@ -21,7 +21,7 @@
 typedef void (*hda_jack_callback_fn) (struct hda_codec *, struct hda_jack_callback *);
 
 struct hda_jack_callback {
-	struct hda_jack_tbl *tbl;
+	hda_nid_t nid;
 	hda_jack_callback_fn func;
 	unsigned int private_data;	/* arbitrary data */
 	struct hda_jack_callback *next;
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 4ef2259..9ceb2bc 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -4427,13 +4427,16 @@
 static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
 {
 	struct ca0132_spec *spec = codec->spec;
+	struct hda_jack_tbl *tbl;
 
 	/* Delay enabling the HP amp, to let the mic-detection
 	 * state machine run.
 	 */
 	cancel_delayed_work_sync(&spec->unsol_hp_work);
 	schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
-	cb->tbl->block_report = 1;
+	tbl = snd_hda_jack_tbl_get(codec, cb->nid);
+	if (tbl)
+		tbl->block_report = 1;
 }
 
 static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index a12ae8a..c1c855a 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -614,6 +614,7 @@
 	CS4208_MAC_AUTO,
 	CS4208_MBA6,
 	CS4208_MBP11,
+	CS4208_MACMINI,
 	CS4208_GPIO0,
 };
 
@@ -621,6 +622,7 @@
 	{ .id = CS4208_GPIO0, .name = "gpio0" },
 	{ .id = CS4208_MBA6, .name = "mba6" },
 	{ .id = CS4208_MBP11, .name = "mbp11" },
+	{ .id = CS4208_MACMINI, .name = "macmini" },
 	{}
 };
 
@@ -632,6 +634,7 @@
 /* codec SSID matching */
 static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11),
+	SND_PCI_QUIRK(0x106b, 0x6c00, "MacMini 7,1", CS4208_MACMINI),
 	SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6),
 	SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6),
 	SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11),
@@ -666,6 +669,24 @@
 	snd_hda_apply_fixup(codec, action);
 }
 
+/* MacMini 7,1 has the inverted jack detection */
+static void cs4208_fixup_macmini(struct hda_codec *codec,
+				 const struct hda_fixup *fix, int action)
+{
+	static const struct hda_pintbl pincfgs[] = {
+		{ 0x18, 0x00ab9150 }, /* mic (audio-in) jack: disable detect */
+		{ 0x21, 0x004be140 }, /* SPDIF: disable detect */
+		{ }
+	};
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		/* HP pin (0x10) has an inverted detection */
+		codec->inv_jack_detect = 1;
+		/* disable the bogus Mic and SPDIF jack detections */
+		snd_hda_apply_pincfgs(codec, pincfgs);
+	}
+}
+
 static int cs4208_spdif_sw_put(struct snd_kcontrol *kcontrol,
 			       struct snd_ctl_elem_value *ucontrol)
 {
@@ -709,6 +730,12 @@
 		.chained = true,
 		.chain_id = CS4208_GPIO0,
 	},
+	[CS4208_MACMINI] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = cs4208_fixup_macmini,
+		.chained = true,
+		.chain_id = CS4208_GPIO0,
+	},
 	[CS4208_GPIO0] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = cs4208_fixup_gpio0,
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 426a29a..bcbc4ee 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -448,7 +448,8 @@
 	eld = &per_pin->sink_eld;
 
 	mutex_lock(&per_pin->lock);
-	if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data)) {
+	if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data) ||
+	    eld->eld_size > ELD_MAX_SIZE) {
 		mutex_unlock(&per_pin->lock);
 		snd_BUG();
 		return -EINVAL;
@@ -1193,7 +1194,7 @@
 static void jack_callback(struct hda_codec *codec,
 			  struct hda_jack_callback *jack)
 {
-	check_presence_and_report(codec, jack->tbl->nid);
+	check_presence_and_report(codec, jack->nid);
 }
 
 static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res)
@@ -2476,13 +2477,6 @@
 			is_broxton(codec))
 		codec->core.link_power_control = 1;
 
-	if (codec_has_acomp(codec)) {
-		codec->depop_delay = 0;
-		spec->i915_audio_ops.audio_ptr = codec;
-		spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
-		snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
-	}
-
 	if (hdmi_parse_codec(codec) < 0) {
 		if (spec->i915_bound)
 			snd_hdac_i915_exit(&codec->bus->core);
@@ -2504,6 +2498,18 @@
 
 	init_channel_allocations();
 
+	if (codec_has_acomp(codec)) {
+		codec->depop_delay = 0;
+		spec->i915_audio_ops.audio_ptr = codec;
+		/* intel_audio_codec_enable() or intel_audio_codec_disable()
+		 * will call pin_eld_notify with using audio_ptr pointer
+		 * We need make sure audio_ptr is really setup
+		 */
+		wmb();
+		spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
+		snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
+	}
+
 	return 0;
 }
 
@@ -3653,6 +3659,7 @@
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",	patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",	patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",	patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",	patch_via_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3375324..93d2156 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -282,7 +282,7 @@
 	uctl = kzalloc(sizeof(*uctl), GFP_KERNEL);
 	if (!uctl)
 		return;
-	val = snd_hda_codec_read(codec, jack->tbl->nid, 0,
+	val = snd_hda_codec_read(codec, jack->nid, 0,
 				 AC_VERB_GET_VOLUME_KNOB_CONTROL, 0);
 	val &= HDA_AMP_VOLMASK;
 	uctl->value.integer.value[0] = val;
@@ -327,6 +327,7 @@
 	case 0x10ec0292:
 		alc_update_coef_idx(codec, 0x4, 1<<15, 0);
 		break;
+	case 0x10ec0225:
 	case 0x10ec0233:
 	case 0x10ec0255:
 	case 0x10ec0256:
@@ -900,6 +901,7 @@
 	{ 0x10ec0899, 0x1028, 0, "ALC3861" },
 	{ 0x10ec0298, 0x1028, 0, "ALC3266" },
 	{ 0x10ec0256, 0x1028, 0, "ALC3246" },
+	{ 0x10ec0225, 0x1028, 0, "ALC3253" },
 	{ 0x10ec0670, 0x1025, 0, "ALC669X" },
 	{ 0x10ec0676, 0x1025, 0, "ALC679X" },
 	{ 0x10ec0282, 0x1043, 0, "ALC3229" },
@@ -1785,7 +1787,6 @@
 	ALC882_FIXUP_NO_PRIMARY_HP,
 	ALC887_FIXUP_ASUS_BASS,
 	ALC887_FIXUP_BASS_CHMAP,
-	ALC882_FIXUP_DISABLE_AAMIX,
 };
 
 static void alc889_fixup_coef(struct hda_codec *codec,
@@ -1947,8 +1948,6 @@
 
 static void alc_fixup_bass_chmap(struct hda_codec *codec,
 				 const struct hda_fixup *fix, int action);
-static void alc_fixup_disable_aamix(struct hda_codec *codec,
-				    const struct hda_fixup *fix, int action);
 
 static const struct hda_fixup alc882_fixups[] = {
 	[ALC882_FIXUP_ABIT_AW9D_MAX] = {
@@ -2186,10 +2185,6 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_bass_chmap,
 	},
-	[ALC882_FIXUP_DISABLE_AAMIX] = {
-		.type = HDA_FIXUP_FUNC,
-		.v.func = alc_fixup_disable_aamix,
-	},
 };
 
 static const struct snd_pci_quirk alc882_fixup_tbl[] = {
@@ -2228,6 +2223,7 @@
 	SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT),
 	SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP),
 	SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP),
+	SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP),
 
 	/* All Apple entries are in codec SSIDs */
 	SND_PCI_QUIRK(0x106b, 0x00a0, "MacBookPro 3,1", ALC889_FIXUP_MBP_VREF),
@@ -2257,7 +2253,6 @@
 	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
 	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1458, 0xa182, "Gigabyte Z170X-UD3", ALC882_FIXUP_DISABLE_AAMIX),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -2651,6 +2646,7 @@
 	ALC269_TYPE_ALC298,
 	ALC269_TYPE_ALC255,
 	ALC269_TYPE_ALC256,
+	ALC269_TYPE_ALC225,
 };
 
 /*
@@ -2680,6 +2676,7 @@
 	case ALC269_TYPE_ALC298:
 	case ALC269_TYPE_ALC255:
 	case ALC269_TYPE_ALC256:
+	case ALC269_TYPE_ALC225:
 		ssids = alc269_ssids;
 		break;
 	default:
@@ -3658,6 +3655,16 @@
 		WRITE_COEF(0xb7, 0x802b),
 		{}
 	};
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x4a, 1<<8, 0),
+		UPDATE_COEFEX(0x57, 0x05, 1<<14, 0),
+		UPDATE_COEF(0x63, 3<<14, 3<<14),
+		UPDATE_COEF(0x4a, 3<<4, 2<<4),
+		UPDATE_COEF(0x4a, 3<<10, 3<<10),
+		UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+		UPDATE_COEF(0x4a, 3<<10, 0),
+		{}
+	};
 
 	switch (codec->core.vendor_id) {
 	case 0x10ec0255:
@@ -3682,6 +3689,9 @@
 	case 0x10ec0668:
 		alc_process_coef_fw(codec, coef0668);
 		break;
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		break;
 	}
 	codec_dbg(codec, "Headset jack set to unplugged mode.\n");
 }
@@ -3727,6 +3737,13 @@
 		UPDATE_COEF(0xc3, 0, 1<<12),
 		{}
 	};
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14),
+		UPDATE_COEF(0x4a, 3<<4, 2<<4),
+		UPDATE_COEF(0x63, 3<<14, 0),
+		{}
+	};
+
 
 	switch (codec->core.vendor_id) {
 	case 0x10ec0255:
@@ -3772,12 +3789,22 @@
 		alc_process_coef_fw(codec, coef0688);
 		snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
 		break;
+	case 0x10ec0225:
+		alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x31<<10);
+		snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+		alc_process_coef_fw(codec, coef0225);
+		snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+		break;
 	}
 	codec_dbg(codec, "Headset jack set to mic-in mode.\n");
 }
 
 static void alc_headset_mode_default(struct hda_codec *codec)
 {
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+		{}
+	};
 	static struct coef_fw coef0255[] = {
 		WRITE_COEF(0x45, 0xc089),
 		WRITE_COEF(0x45, 0xc489),
@@ -3819,6 +3846,9 @@
 	};
 
 	switch (codec->core.vendor_id) {
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		break;
 	case 0x10ec0255:
 	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
@@ -3884,6 +3914,13 @@
 		WRITE_COEF(0xc3, 0x0000),
 		{}
 	};
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x45, 0x3f<<10, 0x35<<10),
+		UPDATE_COEF(0x49, 1<<8, 1<<8),
+		UPDATE_COEF(0x4a, 7<<6, 7<<6),
+		UPDATE_COEF(0x4a, 3<<4, 3<<4),
+		{}
+	};
 
 	switch (codec->core.vendor_id) {
 	case 0x10ec0255:
@@ -3912,6 +3949,9 @@
 	case 0x10ec0668:
 		alc_process_coef_fw(codec, coef0688);
 		break;
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		break;
 	}
 	codec_dbg(codec, "Headset jack set to iPhone-style headset mode.\n");
 }
@@ -3955,6 +3995,13 @@
 		WRITE_COEF(0xc3, 0x0000),
 		{}
 	};
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x45, 0x3f<<10, 0x39<<10),
+		UPDATE_COEF(0x49, 1<<8, 1<<8),
+		UPDATE_COEF(0x4a, 7<<6, 7<<6),
+		UPDATE_COEF(0x4a, 3<<4, 3<<4),
+		{}
+	};
 
 	switch (codec->core.vendor_id) {
 	case 0x10ec0255:
@@ -3983,6 +4030,9 @@
 	case 0x10ec0668:
 		alc_process_coef_fw(codec, coef0688);
 		break;
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		break;
 	}
 	codec_dbg(codec, "Headset jack set to Nokia-style headset mode.\n");
 }
@@ -4014,6 +4064,11 @@
 		WRITE_COEF(0xc3, 0x0c00),
 		{}
 	};
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+		UPDATE_COEF(0x49, 1<<8, 1<<8),
+		{}
+	};
 
 	switch (codec->core.vendor_id) {
 	case 0x10ec0255:
@@ -4058,6 +4113,12 @@
 		val = alc_read_coef_idx(codec, 0xbe);
 		is_ctia = (val & 0x1c02) == 0x1c02;
 		break;
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		msleep(800);
+		val = alc_read_coef_idx(codec, 0x46);
+		is_ctia = (val & 0x00f0) == 0x00f0;
+		break;
 	}
 
 	codec_dbg(codec, "Headset jack detected iPhone-style headset: %s\n",
@@ -4695,6 +4756,9 @@
 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
 	ALC293_FIXUP_LENOVO_SPK_NOISE,
 	ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
+	ALC255_FIXUP_DELL_SPK_NOISE,
+	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+	ALC280_FIXUP_HP_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5314,6 +5378,29 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc233_fixup_lenovo_line2_mic_hotkey,
 	},
+	[ALC255_FIXUP_DELL_SPK_NOISE] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+		.chained = true,
+		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
+	[ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* Disable pass-through path for FRONT 14h */
+			{ 0x20, AC_VERB_SET_COEF_INDEX, 0x36 },
+			{ 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 },
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
+	[ALC280_FIXUP_HP_HEADSET_MIC] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MIC,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5325,6 +5412,7 @@
 	SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK),
 	SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
+	SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
 	SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
@@ -5356,6 +5444,7 @@
 	SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
 	SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
 	SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -5416,6 +5505,7 @@
 	SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -5560,6 +5650,9 @@
 	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
 	{}
 };
+#define ALC225_STANDARD_PINS \
+	{0x12, 0xb7a60130}, \
+	{0x21, 0x04211020}
 
 #define ALC256_STANDARD_PINS \
 	{0x12, 0x90a60140}, \
@@ -5581,6 +5674,12 @@
 	{0x21, 0x03211020}
 
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC225_STANDARD_PINS,
+		{0x14, 0x901701a0}),
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC225_STANDARD_PINS,
+		{0x14, 0x901701b0}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
 		{0x14, 0x90170110},
 		{0x21, 0x02211020}),
@@ -5906,6 +6005,9 @@
 		spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
 		alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
 		break;
+	case 0x10ec0225:
+		spec->codec_variant = ALC269_TYPE_ALC225;
+		break;
 	}
 
 	if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6796,6 +6898,7 @@
  */
 static const struct hda_device_id snd_hda_id_realtek[] = {
 	HDA_CODEC_ENTRY(0x10ec0221, "ALC221", patch_alc269),
+	HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 2c7c5eb..37b70f8 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -493,9 +493,9 @@
 	if (!spec->num_pwrs)
 		return;
 
-	if (jack && jack->tbl->nid) {
-		stac_toggle_power_map(codec, jack->tbl->nid,
-				      snd_hda_jack_detect(codec, jack->tbl->nid),
+	if (jack && jack->nid) {
+		stac_toggle_power_map(codec, jack->nid,
+				      snd_hda_jack_detect(codec, jack->nid),
 				      true);
 		return;
 	}
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 2875b4f..7c8941b 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -2879,7 +2879,7 @@
 {
 	struct hdsp *hdsp = snd_kcontrol_chip(kcontrol);
 
-	ucontrol->value.enumerated.item[0] = hdsp_dds_offset(hdsp);
+	ucontrol->value.integer.value[0] = hdsp_dds_offset(hdsp);
 	return 0;
 }
 
@@ -2891,7 +2891,7 @@
 
 	if (!snd_hdsp_use_is_exclusive(hdsp))
 		return -EBUSY;
-	val = ucontrol->value.enumerated.item[0];
+	val = ucontrol->value.integer.value[0];
 	spin_lock_irq(&hdsp->lock);
 	if (val != hdsp_dds_offset(hdsp))
 		change = (hdsp_set_dds_offset(hdsp, val) == 0) ? 1 : 0;
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 8bc8016..a4a999a 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -1601,6 +1601,9 @@
 {
 	u64 n;
 
+	if (snd_BUG_ON(rate <= 0))
+		return;
+
 	if (rate >= 112000)
 		rate /= 4;
 	else if (rate >= 56000)
@@ -2215,6 +2218,8 @@
 		} else {
 			/* slave mode, return external sample rate */
 			rate = hdspm_external_sample_rate(hdspm);
+			if (!rate)
+				rate = hdspm->system_sample_rate;
 		}
 	}
 
@@ -2260,8 +2265,11 @@
 					    ucontrol)
 {
 	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
+	int rate = ucontrol->value.integer.value[0];
 
-	hdspm_set_dds_value(hdspm, ucontrol->value.enumerated.item[0]);
+	if (rate < 27000 || rate > 207000)
+		return -EINVAL;
+	hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]);
 	return 0;
 }
 
@@ -4449,7 +4457,7 @@
 {
 	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
 
-	ucontrol->value.enumerated.item[0] = hdspm->tco->term;
+	ucontrol->value.integer.value[0] = hdspm->tco->term;
 
 	return 0;
 }
@@ -4460,8 +4468,8 @@
 {
 	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
 
-	if (hdspm->tco->term != ucontrol->value.enumerated.item[0]) {
-		hdspm->tco->term = ucontrol->value.enumerated.item[0];
+	if (hdspm->tco->term != ucontrol->value.integer.value[0]) {
+		hdspm->tco->term = ucontrol->value.integer.value[0];
 
 		hdspm_tco_write(hdspm);
 
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index 3191e0a..d1fb035 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -635,6 +635,7 @@
 					    SNDRV_PCM_HW_PARAM_PERIODS);
 	if (ret < 0) {
 		dev_err(prtd->platform->dev, "set integer constraint failed\n");
+		kfree(adata);
 		return ret;
 	}
 
diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c
index affb192..faae693 100644
--- a/sound/soc/codecs/ab8500-codec.c
+++ b/sound/soc/codecs/ab8500-codec.c
@@ -1130,7 +1130,7 @@
 	struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
 
 	mutex_lock(&drvdata->ctrl_lock);
-	ucontrol->value.integer.value[0] = drvdata->sid_status;
+	ucontrol->value.enumerated.item[0] = drvdata->sid_status;
 	mutex_unlock(&drvdata->ctrl_lock);
 
 	return 0;
@@ -1147,7 +1147,7 @@
 
 	dev_dbg(codec->dev, "%s: Enter\n", __func__);
 
-	if (ucontrol->value.integer.value[0] != SID_APPLY_FIR) {
+	if (ucontrol->value.enumerated.item[0] != SID_APPLY_FIR) {
 		dev_err(codec->dev,
 			"%s: ERROR: This control supports '%s' only!\n",
 			__func__, enum_sid_state[SID_APPLY_FIR]);
@@ -1199,7 +1199,7 @@
 	struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
 
 	mutex_lock(&drvdata->ctrl_lock);
-	ucontrol->value.integer.value[0] = drvdata->anc_status;
+	ucontrol->value.enumerated.item[0] = drvdata->anc_status;
 	mutex_unlock(&drvdata->ctrl_lock);
 
 	return 0;
@@ -1220,7 +1220,7 @@
 
 	mutex_lock(&drvdata->ctrl_lock);
 
-	req = ucontrol->value.integer.value[0];
+	req = ucontrol->value.enumerated.item[0];
 	if (req >= ARRAY_SIZE(enum_anc_state)) {
 		status = -EINVAL;
 		goto cleanup;
diff --git a/sound/soc/codecs/adau17x1.h b/sound/soc/codecs/adau17x1.h
index e13583e..5ae87a0 100644
--- a/sound/soc/codecs/adau17x1.h
+++ b/sound/soc/codecs/adau17x1.h
@@ -103,9 +103,9 @@
 #define ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL	BIT(3)
 #define ADAU17X1_CLOCK_CONTROL_SYSCLK_EN	BIT(0)
 
-#define ADAU17X1_SERIAL_PORT1_BCLK32		(0x0 << 5)
-#define ADAU17X1_SERIAL_PORT1_BCLK48		(0x1 << 5)
-#define ADAU17X1_SERIAL_PORT1_BCLK64		(0x2 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK64		(0x0 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK32		(0x1 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK48		(0x2 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK128		(0x3 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK256		(0x4 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK_MASK		(0x7 << 5)
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 33143fe..9178531 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1929,6 +1929,25 @@
 	{ 1000000, 13500000, 0,  1 },
 };
 
+static const unsigned int pseudo_fref_max[ARIZONA_FLL_MAX_FRATIO] = {
+	13500000,
+	 6144000,
+	 6144000,
+	 3072000,
+	 3072000,
+	 2822400,
+	 2822400,
+	 1536000,
+	 1536000,
+	 1536000,
+	 1536000,
+	 1536000,
+	 1536000,
+	 1536000,
+	 1536000,
+	  768000,
+};
+
 static struct {
 	unsigned int min;
 	unsigned int max;
@@ -2042,16 +2061,32 @@
 	/* Adjust FRATIO/refdiv to avoid integer mode if possible */
 	refdiv = cfg->refdiv;
 
+	arizona_fll_dbg(fll, "pseudo: initial ratio=%u fref=%u refdiv=%u\n",
+			init_ratio, Fref, refdiv);
+
 	while (div <= ARIZONA_FLL_MAX_REFDIV) {
 		for (ratio = init_ratio; ratio <= ARIZONA_FLL_MAX_FRATIO;
 		     ratio++) {
 			if ((ARIZONA_FLL_VCO_CORNER / 2) /
-			    (fll->vco_mult * ratio) < Fref)
+			    (fll->vco_mult * ratio) < Fref) {
+				arizona_fll_dbg(fll, "pseudo: hit VCO corner\n");
 				break;
+			}
+
+			if (Fref > pseudo_fref_max[ratio - 1]) {
+				arizona_fll_dbg(fll,
+					"pseudo: exceeded max fref(%u) for ratio=%u\n",
+					pseudo_fref_max[ratio - 1],
+					ratio);
+				break;
+			}
 
 			if (target % (ratio * Fref)) {
 				cfg->refdiv = refdiv;
 				cfg->fratio = ratio - 1;
+				arizona_fll_dbg(fll,
+					"pseudo: found fref=%u refdiv=%d(%d) ratio=%d\n",
+					Fref, refdiv, div, ratio);
 				return ratio;
 			}
 		}
@@ -2060,6 +2095,9 @@
 			if (target % (ratio * Fref)) {
 				cfg->refdiv = refdiv;
 				cfg->fratio = ratio - 1;
+				arizona_fll_dbg(fll,
+					"pseudo: found fref=%u refdiv=%d(%d) ratio=%d\n",
+					Fref, refdiv, div, ratio);
 				return ratio;
 			}
 		}
@@ -2068,6 +2106,9 @@
 		Fref /= 2;
 		refdiv++;
 		init_ratio = arizona_find_fratio(Fref, NULL);
+		arizona_fll_dbg(fll,
+				"pseudo: change fref=%u refdiv=%d(%d) ratio=%u\n",
+				Fref, refdiv, div, init_ratio);
 	}
 
 	arizona_fll_warn(fll, "Falling back to integer mode operation\n");
diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c
index b395152..35488f1 100644
--- a/sound/soc/codecs/cs42l51.c
+++ b/sound/soc/codecs/cs42l51.c
@@ -60,15 +60,15 @@
 	switch (value) {
 	default:
 	case 0:
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 		break;
 	/* same value : (L+R)/2 and (R+L)/2 */
 	case 1:
 	case 2:
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 		break;
 	case 3:
-		ucontrol->value.integer.value[0] = 2;
+		ucontrol->value.enumerated.item[0] = 2;
 		break;
 	}
 
@@ -85,7 +85,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	unsigned char val;
 
-	switch (ucontrol->value.integer.value[0]) {
+	switch (ucontrol->value.enumerated.item[0]) {
 	default:
 	case 0:
 		val = CHAN_MIX_NORMAL;
diff --git a/sound/soc/codecs/da732x.c b/sound/soc/codecs/da732x.c
index 1d5a89c..461506a 100644
--- a/sound/soc/codecs/da732x.c
+++ b/sound/soc/codecs/da732x.c
@@ -334,7 +334,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct soc_enum *enum_ctrl = (struct soc_enum *)kcontrol->private_value;
 	unsigned int reg = enum_ctrl->reg;
-	unsigned int sel = ucontrol->value.integer.value[0];
+	unsigned int sel = ucontrol->value.enumerated.item[0];
 	unsigned int bits;
 
 	switch (sel) {
@@ -368,13 +368,13 @@
 
 	switch (val) {
 	case DA732X_HPF_VOICE_EN:
-		ucontrol->value.integer.value[0] = DA732X_HPF_VOICE;
+		ucontrol->value.enumerated.item[0] = DA732X_HPF_VOICE;
 		break;
 	case DA732X_HPF_MUSIC_EN:
-		ucontrol->value.integer.value[0] = DA732X_HPF_MUSIC;
+		ucontrol->value.enumerated.item[0] = DA732X_HPF_MUSIC;
 		break;
 	default:
-		ucontrol->value.integer.value[0] = DA732X_HPF_DISABLED;
+		ucontrol->value.enumerated.item[0] = DA732X_HPF_DISABLED;
 		break;
 	}
 
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 20dcc49..fc22804 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -1496,7 +1496,7 @@
        struct max98088_pdata *pdata = max98088->pdata;
        int channel = max98088_get_channel(codec, kcontrol->id.name);
        struct max98088_cdata *cdata;
-       int sel = ucontrol->value.integer.value[0];
+	int sel = ucontrol->value.enumerated.item[0];
 
        if (channel < 0)
 	       return channel;
diff --git a/sound/soc/codecs/max98095.c b/sound/soc/codecs/max98095.c
index 1fedac5..3577003 100644
--- a/sound/soc/codecs/max98095.c
+++ b/sound/soc/codecs/max98095.c
@@ -1499,7 +1499,7 @@
 	struct max98095_pdata *pdata = max98095->pdata;
 	int channel = max98095_get_eq_channel(kcontrol->id.name);
 	struct max98095_cdata *cdata;
-	unsigned int sel = ucontrol->value.integer.value[0];
+	unsigned int sel = ucontrol->value.enumerated.item[0];
 	struct max98095_eq_cfg *coef_set;
 	int fs, best, best_val, i;
 	int regmask, regsave;
@@ -1653,7 +1653,7 @@
 	struct max98095_pdata *pdata = max98095->pdata;
 	int channel = max98095_get_bq_channel(codec, kcontrol->id.name);
 	struct max98095_cdata *cdata;
-	unsigned int sel = ucontrol->value.integer.value[0];
+	unsigned int sel = ucontrol->value.enumerated.item[0];
 	struct max98095_biquad_cfg *coef_set;
 	int fs, best, best_val, i;
 	int regmask, regsave;
diff --git a/sound/soc/codecs/rt286.c b/sound/soc/codecs/rt286.c
index bc08f0c..1bd3164 100644
--- a/sound/soc/codecs/rt286.c
+++ b/sound/soc/codecs/rt286.c
@@ -266,6 +266,8 @@
 		} else {
 			*mic = false;
 			regmap_write(rt286->regmap, RT286_SET_MIC1, 0x20);
+			regmap_update_bits(rt286->regmap,
+				RT286_CBJ_CTRL1, 0x0400, 0x0000);
 		}
 	} else {
 		regmap_read(rt286->regmap, RT286_GET_HP_SENSE, &buf);
@@ -470,24 +472,6 @@
 	return 0;
 }
 
-static int rt286_vref_event(struct snd_soc_dapm_widget *w,
-			     struct snd_kcontrol *kcontrol, int event)
-{
-	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm);
-
-	switch (event) {
-	case SND_SOC_DAPM_PRE_PMU:
-		snd_soc_update_bits(codec,
-			RT286_CBJ_CTRL1, 0x0400, 0x0000);
-		mdelay(50);
-		break;
-	default:
-		return 0;
-	}
-
-	return 0;
-}
-
 static int rt286_ldo2_event(struct snd_soc_dapm_widget *w,
 			     struct snd_kcontrol *kcontrol, int event)
 {
@@ -536,7 +520,7 @@
 	SND_SOC_DAPM_SUPPLY_S("HV", 1, RT286_POWER_CTRL1,
 		12, 1, NULL, 0),
 	SND_SOC_DAPM_SUPPLY("VREF", RT286_POWER_CTRL1,
-		0, 1, rt286_vref_event, SND_SOC_DAPM_PRE_PMU),
+		0, 1, NULL, 0),
 	SND_SOC_DAPM_SUPPLY_S("LDO1", 1, RT286_POWER_CTRL2,
 		2, 0, NULL, 0),
 	SND_SOC_DAPM_SUPPLY_S("LDO2", 2, RT286_POWER_CTRL1,
@@ -911,8 +895,6 @@
 	case SND_SOC_BIAS_ON:
 		mdelay(10);
 		snd_soc_update_bits(codec,
-			RT286_CBJ_CTRL1, 0x0400, 0x0400);
-		snd_soc_update_bits(codec,
 			RT286_DC_GAIN, 0x200, 0x0);
 
 		break;
@@ -920,8 +902,6 @@
 	case SND_SOC_BIAS_STANDBY:
 		snd_soc_write(codec,
 			RT286_SET_AUDIO_POWER, AC_PWRST_D3);
-		snd_soc_update_bits(codec,
-			RT286_CBJ_CTRL1, 0x0400, 0x0000);
 		break;
 
 	default:
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index c61d38b..93e8c90 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -776,7 +776,7 @@
 
 	/* IN1/IN2 Control */
 	SOC_SINGLE_TLV("IN1 Boost", RT5645_IN1_CTRL1,
-		RT5645_BST_SFT1, 8, 0, bst_tlv),
+		RT5645_BST_SFT1, 12, 0, bst_tlv),
 	SOC_SINGLE_TLV("IN2 Boost", RT5645_IN2_CTRL,
 		RT5645_BST_SFT2, 8, 0, bst_tlv),
 
diff --git a/sound/soc/codecs/rt5659.c b/sound/soc/codecs/rt5659.c
index 820d8fa..fb8ea05 100644
--- a/sound/soc/codecs/rt5659.c
+++ b/sound/soc/codecs/rt5659.c
@@ -3985,7 +3985,6 @@
 	if (rt5659 == NULL)
 		return -ENOMEM;
 
-	rt5659->i2c = i2c;
 	i2c_set_clientdata(i2c, rt5659);
 
 	if (pdata)
@@ -4157,24 +4156,17 @@
 
 	INIT_DELAYED_WORK(&rt5659->jack_detect_work, rt5659_jack_detect_work);
 
-	if (rt5659->i2c->irq) {
-		ret = request_threaded_irq(rt5659->i2c->irq, NULL, rt5659_irq,
-			IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+	if (i2c->irq) {
+		ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+			rt5659_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
 			| IRQF_ONESHOT, "rt5659", rt5659);
 		if (ret)
 			dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret);
 
 	}
 
-	ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_rt5659,
+	return snd_soc_register_codec(&i2c->dev, &soc_codec_dev_rt5659,
 			rt5659_dai, ARRAY_SIZE(rt5659_dai));
-
-	if (ret) {
-		if (rt5659->i2c->irq)
-			free_irq(rt5659->i2c->irq, rt5659);
-	}
-
-	return 0;
 }
 
 static int rt5659_i2c_remove(struct i2c_client *i2c)
@@ -4191,24 +4183,29 @@
 	regmap_write(rt5659->regmap, RT5659_RESET, 0);
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id rt5659_of_match[] = {
 	{ .compatible = "realtek,rt5658", },
 	{ .compatible = "realtek,rt5659", },
-	{},
+	{ },
 };
+MODULE_DEVICE_TABLE(of, rt5659_of_match);
+#endif
 
+#ifdef CONFIG_ACPI
 static struct acpi_device_id rt5659_acpi_match[] = {
-		{ "10EC5658", 0},
-		{ "10EC5659", 0},
-		{ },
+	{ "10EC5658", 0, },
+	{ "10EC5659", 0, },
+	{ },
 };
 MODULE_DEVICE_TABLE(acpi, rt5659_acpi_match);
+#endif
 
 struct i2c_driver rt5659_i2c_driver = {
 	.driver = {
 		.name = "rt5659",
 		.owner = THIS_MODULE,
-		.of_match_table = rt5659_of_match,
+		.of_match_table = of_match_ptr(rt5659_of_match),
 		.acpi_match_table = ACPI_PTR(rt5659_acpi_match),
 	},
 	.probe = rt5659_i2c_probe,
diff --git a/sound/soc/codecs/rt5659.h b/sound/soc/codecs/rt5659.h
index 8f07ee9..d31c9e5 100644
--- a/sound/soc/codecs/rt5659.h
+++ b/sound/soc/codecs/rt5659.h
@@ -1792,7 +1792,6 @@
 	struct snd_soc_codec *codec;
 	struct rt5659_platform_data pdata;
 	struct regmap *regmap;
-	struct i2c_client *i2c;
 	struct gpio_desc *gpiod_ldo1_en;
 	struct gpio_desc *gpiod_reset;
 	struct snd_soc_jack *hs_jack;
diff --git a/sound/soc/codecs/sigmadsp-i2c.c b/sound/soc/codecs/sigmadsp-i2c.c
index 21ca3a5..d374c18 100644
--- a/sound/soc/codecs/sigmadsp-i2c.c
+++ b/sound/soc/codecs/sigmadsp-i2c.c
@@ -31,7 +31,10 @@
 
 	kfree(buf);
 
-	return ret;
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static int sigmadsp_read_i2c(void *control_data,
diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c
index 781398fb..f7a6ce7 100644
--- a/sound/soc/codecs/tlv320dac33.c
+++ b/sound/soc/codecs/tlv320dac33.c
@@ -446,7 +446,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = dac33->fifo_mode;
+	ucontrol->value.enumerated.item[0] = dac33->fifo_mode;
 
 	return 0;
 }
@@ -458,17 +458,16 @@
 	struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
 	int ret = 0;
 
-	if (dac33->fifo_mode == ucontrol->value.integer.value[0])
+	if (dac33->fifo_mode == ucontrol->value.enumerated.item[0])
 		return 0;
 	/* Do not allow changes while stream is running*/
 	if (snd_soc_codec_is_active(codec))
 		return -EPERM;
 
-	if (ucontrol->value.integer.value[0] < 0 ||
-	    ucontrol->value.integer.value[0] >= DAC33_FIFO_LAST_MODE)
+	if (ucontrol->value.enumerated.item[0] >= DAC33_FIFO_LAST_MODE)
 		ret = -EINVAL;
 	else
-		dac33->fifo_mode = ucontrol->value.integer.value[0];
+		dac33->fifo_mode = ucontrol->value.enumerated.item[0];
 
 	return ret;
 }
diff --git a/sound/soc/codecs/wl1273.c b/sound/soc/codecs/wl1273.c
index 7693c11..1b79778 100644
--- a/sound/soc/codecs/wl1273.c
+++ b/sound/soc/codecs/wl1273.c
@@ -175,7 +175,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wl1273_priv *wl1273 = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = wl1273->mode;
+	ucontrol->value.enumerated.item[0] = wl1273->mode;
 
 	return 0;
 }
@@ -193,18 +193,17 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wl1273_priv *wl1273 = snd_soc_codec_get_drvdata(codec);
 
-	if (wl1273->mode == ucontrol->value.integer.value[0])
+	if (wl1273->mode == ucontrol->value.enumerated.item[0])
 		return 0;
 
 	/* Do not allow changes while stream is running */
 	if (snd_soc_codec_is_active(codec))
 		return -EPERM;
 
-	if (ucontrol->value.integer.value[0] < 0 ||
-	    ucontrol->value.integer.value[0] >=  ARRAY_SIZE(wl1273_audio_route))
+	if (ucontrol->value.enumerated.item[0] >=  ARRAY_SIZE(wl1273_audio_route))
 		return -EINVAL;
 
-	wl1273->mode = ucontrol->value.integer.value[0];
+	wl1273->mode = ucontrol->value.enumerated.item[0];
 
 	return 1;
 }
@@ -219,7 +218,7 @@
 
 	dev_dbg(codec->dev, "%s: enter.\n", __func__);
 
-	ucontrol->value.integer.value[0] = wl1273->core->audio_mode;
+	ucontrol->value.enumerated.item[0] = wl1273->core->audio_mode;
 
 	return 0;
 }
@@ -233,7 +232,7 @@
 
 	dev_dbg(codec->dev, "%s: enter.\n", __func__);
 
-	val = ucontrol->value.integer.value[0];
+	val = ucontrol->value.enumerated.item[0];
 	if (wl1273->core->audio_mode == val)
 		return 0;
 
diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c
index 6088d30..97c0f1e 100644
--- a/sound/soc/codecs/wm5110.c
+++ b/sound/soc/codecs/wm5110.c
@@ -2382,6 +2382,7 @@
 
 static int wm5110_remove(struct platform_device *pdev)
 {
+	snd_soc_unregister_platform(&pdev->dev);
 	snd_soc_unregister_codec(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 61299ca..6f1024f 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -233,7 +233,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = wm8753->dai_func;
+	ucontrol->value.enumerated.item[0] = wm8753->dai_func;
 	return 0;
 }
 
@@ -244,7 +244,7 @@
 	struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 	u16 ioctl;
 
-	if (wm8753->dai_func == ucontrol->value.integer.value[0])
+	if (wm8753->dai_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
 	if (snd_soc_codec_is_active(codec))
@@ -252,7 +252,7 @@
 
 	ioctl = snd_soc_read(codec, WM8753_IOCTL);
 
-	wm8753->dai_func = ucontrol->value.integer.value[0];
+	wm8753->dai_func = ucontrol->value.enumerated.item[0];
 
 	if (((ioctl >> 2) & 0x3) == wm8753->dai_func)
 		return 1;
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 8172e49..edd7a77 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -396,7 +396,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
 	struct wm8904_pdata *pdata = wm8904->pdata;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (value >= pdata->num_drc_cfgs)
 		return -EINVAL;
@@ -467,7 +467,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
 	struct wm8904_pdata *pdata = wm8904->pdata;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (value >= pdata->num_retune_mobile_cfgs)
 		return -EINVAL;
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index c799cca..6b864c0 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -459,7 +459,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
@@ -549,7 +549,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
@@ -582,7 +582,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
@@ -749,7 +749,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
index ff23772..d7f444f 100644
--- a/sound/soc/codecs/wm8960.c
+++ b/sound/soc/codecs/wm8960.c
@@ -240,13 +240,13 @@
 SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
 	7, 1, 1),
 
-SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
-	       WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv),
-SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT2 Volume",
-	       WM8960_INBMIX1, 1, 7, 0, lineinboost_tlv),
 SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT3 Volume",
-	       WM8960_INBMIX2, 4, 7, 0, lineinboost_tlv),
+	       WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv),
 SOC_SINGLE_TLV("Left Input Boost Mixer LINPUT2 Volume",
+	       WM8960_INBMIX1, 1, 7, 0, lineinboost_tlv),
+SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
+	       WM8960_INBMIX2, 4, 7, 0, lineinboost_tlv),
+SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT2 Volume",
 	       WM8960_INBMIX2, 1, 7, 0, lineinboost_tlv),
 SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT1 Volume",
 		WM8960_RINPATH, 4, 3, 0, micboost_tlv),
@@ -643,29 +643,31 @@
 		return -EINVAL;
 	}
 
-	/* check if the sysclk frequency is available. */
-	for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
-		if (sysclk_divs[i] == -1)
-			continue;
-		sysclk = freq_out / sysclk_divs[i];
-		for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) {
-			if (sysclk == dac_divs[j] * lrclk) {
+	if (wm8960->clk_id != WM8960_SYSCLK_PLL) {
+		/* check if the sysclk frequency is available. */
+		for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
+			if (sysclk_divs[i] == -1)
+				continue;
+			sysclk = freq_out / sysclk_divs[i];
+			for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) {
+				if (sysclk != dac_divs[j] * lrclk)
+					continue;
 				for (k = 0; k < ARRAY_SIZE(bclk_divs); ++k)
 					if (sysclk == bclk * bclk_divs[k] / 10)
 						break;
 				if (k != ARRAY_SIZE(bclk_divs))
 					break;
 			}
+			if (j != ARRAY_SIZE(dac_divs))
+				break;
 		}
-		if (j != ARRAY_SIZE(dac_divs))
-			break;
-	}
 
-	if (i != ARRAY_SIZE(sysclk_divs)) {
-		goto configure_clock;
-	} else if (wm8960->clk_id != WM8960_SYSCLK_AUTO) {
-		dev_err(codec->dev, "failed to configure clock\n");
-		return -EINVAL;
+		if (i != ARRAY_SIZE(sysclk_divs)) {
+			goto configure_clock;
+		} else if (wm8960->clk_id != WM8960_SYSCLK_AUTO) {
+			dev_err(codec->dev, "failed to configure clock\n");
+			return -EINVAL;
+		}
 	}
 	/* get a available pll out frequency and set pll */
 	for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
diff --git a/sound/soc/codecs/wm8983.c b/sound/soc/codecs/wm8983.c
index 7350ff6..0c002a5 100644
--- a/sound/soc/codecs/wm8983.c
+++ b/sound/soc/codecs/wm8983.c
@@ -497,9 +497,9 @@
 
 	reg = snd_soc_read(codec, WM8983_EQ1_LOW_SHELF);
 	if (reg & WM8983_EQ3DMODE)
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 	else
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 
 	return 0;
 }
@@ -511,18 +511,18 @@
 	unsigned int regpwr2, regpwr3;
 	unsigned int reg_eq;
 
-	if (ucontrol->value.integer.value[0] != 0
-	    && ucontrol->value.integer.value[0] != 1)
+	if (ucontrol->value.enumerated.item[0] != 0
+	    && ucontrol->value.enumerated.item[0] != 1)
 		return -EINVAL;
 
 	reg_eq = snd_soc_read(codec, WM8983_EQ1_LOW_SHELF);
 	switch ((reg_eq & WM8983_EQ3DMODE) >> WM8983_EQ3DMODE_SHIFT) {
 	case 0:
-		if (!ucontrol->value.integer.value[0])
+		if (!ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	case 1:
-		if (ucontrol->value.integer.value[0])
+		if (ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	}
@@ -537,7 +537,7 @@
 	/* set the desired eqmode */
 	snd_soc_update_bits(codec, WM8983_EQ1_LOW_SHELF,
 			    WM8983_EQ3DMODE_MASK,
-			    ucontrol->value.integer.value[0]
+			    ucontrol->value.enumerated.item[0]
 			    << WM8983_EQ3DMODE_SHIFT);
 	/* restore DAC/ADC configuration */
 	snd_soc_write(codec, WM8983_POWER_MANAGEMENT_2, regpwr2);
diff --git a/sound/soc/codecs/wm8985.c b/sound/soc/codecs/wm8985.c
index 9918152..6ac76fe 100644
--- a/sound/soc/codecs/wm8985.c
+++ b/sound/soc/codecs/wm8985.c
@@ -531,9 +531,9 @@
 
 	reg = snd_soc_read(codec, WM8985_EQ1_LOW_SHELF);
 	if (reg & WM8985_EQ3DMODE)
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 	else
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 
 	return 0;
 }
@@ -545,18 +545,18 @@
 	unsigned int regpwr2, regpwr3;
 	unsigned int reg_eq;
 
-	if (ucontrol->value.integer.value[0] != 0
-			&& ucontrol->value.integer.value[0] != 1)
+	if (ucontrol->value.enumerated.item[0] != 0
+			&& ucontrol->value.enumerated.item[0] != 1)
 		return -EINVAL;
 
 	reg_eq = snd_soc_read(codec, WM8985_EQ1_LOW_SHELF);
 	switch ((reg_eq & WM8985_EQ3DMODE) >> WM8985_EQ3DMODE_SHIFT) {
 	case 0:
-		if (!ucontrol->value.integer.value[0])
+		if (!ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	case 1:
-		if (ucontrol->value.integer.value[0])
+		if (ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	}
@@ -573,7 +573,7 @@
 	/* set the desired eqmode */
 	snd_soc_update_bits(codec, WM8985_EQ1_LOW_SHELF,
 			    WM8985_EQ3DMODE_MASK,
-			    ucontrol->value.integer.value[0]
+			    ucontrol->value.enumerated.item[0]
 			    << WM8985_EQ3DMODE_SHIFT);
 	/* restore DAC/ADC configuration */
 	snd_soc_write(codec, WM8985_POWER_MANAGEMENT_2, regpwr2);
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 2ccbb32..a18aecb 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -362,7 +362,7 @@
 	struct wm8994 *control = wm8994->wm8994;
 	struct wm8994_pdata *pdata = &control->pdata;
 	int drc = wm8994_get_drc(kcontrol->id.name);
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (drc < 0)
 		return drc;
@@ -469,7 +469,7 @@
 	struct wm8994 *control = wm8994->wm8994;
 	struct wm8994_pdata *pdata = &control->pdata;
 	int block = wm8994_get_retune_mobile_block(kcontrol->id.name);
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (block < 0)
 		return block;
diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index 8d7d6c0..f99b34f 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -416,7 +416,7 @@
 	struct wm8996_priv *wm8996 = snd_soc_codec_get_drvdata(codec);
 	struct wm8996_pdata *pdata = &wm8996->pdata;
 	int block = wm8996_get_retune_mobile_block(kcontrol->id.name);
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (block < 0)
 		return block;
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index ccb3b15..363b3b6 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -344,9 +344,9 @@
 
 	reg = snd_soc_read(codec, WM9081_ANALOGUE_SPEAKER_2);
 	if (reg & WM9081_SPK_MODE)
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 	else
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 
 	return 0;
 }
@@ -365,7 +365,7 @@
 	unsigned int reg2 = snd_soc_read(codec, WM9081_ANALOGUE_SPEAKER_2);
 
 	/* Are we changing anything? */
-	if (ucontrol->value.integer.value[0] ==
+	if (ucontrol->value.enumerated.item[0] ==
 	    ((reg2 & WM9081_SPK_MODE) != 0))
 		return 0;
 
@@ -373,7 +373,7 @@
 	if (reg_pwr & WM9081_SPK_ENA)
 		return -EINVAL;
 
-	if (ucontrol->value.integer.value[0]) {
+	if (ucontrol->value.enumerated.item[0]) {
 		/* Class AB */
 		reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL);
 		reg2 |= WM9081_SPK_MODE;
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index 79e1436..9849643 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -1212,7 +1212,7 @@
 	if (IS_ERR(wm9713->ac97))
 		return PTR_ERR(wm9713->ac97);
 
-	regmap = devm_regmap_init_ac97(wm9713->ac97, &wm9713_regmap_config);
+	regmap = regmap_init_ac97(wm9713->ac97, &wm9713_regmap_config);
 	if (IS_ERR(regmap)) {
 		snd_soc_free_ac97_codec(wm9713->ac97);
 		return PTR_ERR(regmap);
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 33806d4..b9195b9 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -586,7 +586,7 @@
 	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
 	struct wm_adsp *dsp = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = dsp[e->shift_l].fw;
+	ucontrol->value.enumerated.item[0] = dsp[e->shift_l].fw;
 
 	return 0;
 }
@@ -599,10 +599,10 @@
 	struct wm_adsp *dsp = snd_soc_codec_get_drvdata(codec);
 	int ret = 0;
 
-	if (ucontrol->value.integer.value[0] == dsp[e->shift_l].fw)
+	if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw)
 		return 0;
 
-	if (ucontrol->value.integer.value[0] >= WM_ADSP_NUM_FW)
+	if (ucontrol->value.enumerated.item[0] >= WM_ADSP_NUM_FW)
 		return -EINVAL;
 
 	mutex_lock(&dsp[e->shift_l].pwr_lock);
@@ -610,7 +610,7 @@
 	if (dsp[e->shift_l].running || dsp[e->shift_l].compr)
 		ret = -EBUSY;
 	else
-		dsp[e->shift_l].fw = ucontrol->value.integer.value[0];
+		dsp[e->shift_l].fw = ucontrol->value.enumerated.item[0];
 
 	mutex_unlock(&dsp[e->shift_l].pwr_lock);
 
diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c
index ce664c2..bff258d 100644
--- a/sound/soc/dwc/designware_i2s.c
+++ b/sound/soc/dwc/designware_i2s.c
@@ -645,6 +645,8 @@
 
 	dev->dev = &pdev->dev;
 
+	dev->i2s_reg_comp1 = I2S_COMP_PARAM_1;
+	dev->i2s_reg_comp2 = I2S_COMP_PARAM_2;
 	if (pdata) {
 		dev->capability = pdata->cap;
 		clk_id = NULL;
@@ -652,9 +654,6 @@
 		if (dev->quirks & DW_I2S_QUIRK_COMP_REG_OFFSET) {
 			dev->i2s_reg_comp1 = pdata->i2s_reg_comp1;
 			dev->i2s_reg_comp2 = pdata->i2s_reg_comp2;
-		} else {
-			dev->i2s_reg_comp1 = I2S_COMP_PARAM_1;
-			dev->i2s_reg_comp2 = I2S_COMP_PARAM_2;
 		}
 		ret = dw_configure_dai_by_pd(dev, dw_i2s_dai, res, pdata);
 	} else {
diff --git a/sound/soc/fsl/imx-pcm-fiq.c b/sound/soc/fsl/imx-pcm-fiq.c
index 49d7513..e63cd5e 100644
--- a/sound/soc/fsl/imx-pcm-fiq.c
+++ b/sound/soc/fsl/imx-pcm-fiq.c
@@ -217,8 +217,8 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int ret;
 
-	ret = dma_mmap_writecombine(substream->pcm->card->dev, vma,
-		runtime->dma_area, runtime->dma_addr, runtime->dma_bytes);
+	ret = dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			  runtime->dma_addr, runtime->dma_bytes);
 
 	pr_debug("%s: ret: %d %p %pad 0x%08x\n", __func__, ret,
 			runtime->dma_area,
@@ -247,8 +247,7 @@
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
 	buf->dev.dev = pcm->card->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-					   &buf->addr, GFP_KERNEL);
+	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
 	if (!buf->area)
 		return -ENOMEM;
 	buf->bytes = size;
@@ -330,8 +329,7 @@
 		if (!buf->area)
 			continue;
 
-		dma_free_writecombine(pcm->card->dev, buf->bytes,
-				      buf->area, buf->addr);
+		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
 		buf->area = NULL;
 	}
 }
diff --git a/sound/soc/fsl/imx-spdif.c b/sound/soc/fsl/imx-spdif.c
index a407e83..fb896b2 100644
--- a/sound/soc/fsl/imx-spdif.c
+++ b/sound/soc/fsl/imx-spdif.c
@@ -72,8 +72,6 @@
 		goto end;
 	}
 
-	platform_set_drvdata(pdev, data);
-
 end:
 	of_node_put(spdif_np);
 
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 1ded881..2389ab4 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -99,7 +99,7 @@
 		if (ret && ret != -ENOTSUPP)
 			goto err;
 	}
-
+	return 0;
 err:
 	return ret;
 }
diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 803f95e..7d7c872 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -30,11 +30,15 @@
 config SND_SOC_INTEL_SST
 	tristate
 	select SND_SOC_INTEL_SST_ACPI if ACPI
+	select SND_SOC_INTEL_SST_MATCH if ACPI
 	depends on (X86 || COMPILE_TEST)
 
 config SND_SOC_INTEL_SST_ACPI
 	tristate
 
+config SND_SOC_INTEL_SST_MATCH
+	tristate
+
 config SND_SOC_INTEL_HASWELL
 	tristate
 
@@ -57,7 +61,7 @@
 config SND_SOC_INTEL_BYT_RT5640_MACH
 	tristate "ASoC Audio driver for Intel Baytrail with RT5640 codec"
 	depends on X86_INTEL_LPSS && I2C
-	depends on DW_DMAC_CORE=y && (SND_SOC_INTEL_BYTCR_RT5640_MACH = n)
+	depends on DW_DMAC_CORE=y && (SND_SST_IPC_ACPI = n)
 	select SND_SOC_INTEL_SST
 	select SND_SOC_INTEL_BAYTRAIL
 	select SND_SOC_RT5640
@@ -69,7 +73,7 @@
 config SND_SOC_INTEL_BYT_MAX98090_MACH
 	tristate "ASoC Audio driver for Intel Baytrail with MAX98090 codec"
 	depends on X86_INTEL_LPSS && I2C
-	depends on DW_DMAC_CORE=y
+	depends on DW_DMAC_CORE=y && (SND_SST_IPC_ACPI = n)
 	select SND_SOC_INTEL_SST
 	select SND_SOC_INTEL_BAYTRAIL
 	select SND_SOC_MAX98090
@@ -97,6 +101,7 @@
 	select SND_SOC_RT5640
 	select SND_SST_MFLD_PLATFORM
 	select SND_SST_IPC_ACPI
+	select SND_SOC_INTEL_SST_MATCH if ACPI
 	help
           This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR
           platforms with RT5640 audio codec.
@@ -109,6 +114,7 @@
 	select SND_SOC_RT5651
 	select SND_SST_MFLD_PLATFORM
 	select SND_SST_IPC_ACPI
+	select SND_SOC_INTEL_SST_MATCH if ACPI
 	help
           This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR
           platforms with RT5651 audio codec.
@@ -121,6 +127,7 @@
         select SND_SOC_RT5670
         select SND_SST_MFLD_PLATFORM
         select SND_SST_IPC_ACPI
+	select SND_SOC_INTEL_SST_MATCH if ACPI
         help
           This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell
           platforms with RT5672 audio codec.
@@ -133,6 +140,7 @@
 	select SND_SOC_RT5645
 	select SND_SST_MFLD_PLATFORM
 	select SND_SST_IPC_ACPI
+	select SND_SOC_INTEL_SST_MATCH if ACPI
 	help
 	  This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell
 	  platforms with RT5645/5650 audio codec.
@@ -145,6 +153,7 @@
 	select SND_SOC_TS3A227E
 	select SND_SST_MFLD_PLATFORM
 	select SND_SST_IPC_ACPI
+	select SND_SOC_INTEL_SST_MATCH if ACPI
 	help
       This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell
       platforms with MAX98090 audio codec it also can support TI jack chip as aux device.
diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c
index 55c33dc..52ed434 100644
--- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c
+++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c
@@ -528,6 +528,7 @@
 	.ops = &sst_compr_dai_ops,
 	.playback = {
 		.stream_name = "Compress Playback",
+		.channels_min = 1,
 	},
 },
 /* BE CPU  Dais */
diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index 2d3afdd..a7b96a9 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -367,8 +367,12 @@
 	}
 	card->dev = &pdev->dev;
 	sprintf(codec_name, "i2c-%s:00", drv->acpi_card->codec_id);
+
 	/* set correct codec name */
-	strcpy((char *)card->dai_link[2].codec_name, codec_name);
+	for (i = 0; i < ARRAY_SIZE(cht_dailink); i++)
+		if (!strcmp(card->dai_link[i].codec_name, "i2c-10EC5645:00"))
+			card->dai_link[i].codec_name = kstrdup(codec_name, GFP_KERNEL);
+
 	snd_soc_card_set_drvdata(card, drv);
 	ret_val = devm_snd_soc_register_card(&pdev->dev, card);
 	if (ret_val) {
diff --git a/sound/soc/intel/boards/mfld_machine.c b/sound/soc/intel/boards/mfld_machine.c
index 49c09a0..34f46c7 100644
--- a/sound/soc/intel/boards/mfld_machine.c
+++ b/sound/soc/intel/boards/mfld_machine.c
@@ -94,7 +94,7 @@
 static int headset_get_switch(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = hs_switch;
+	ucontrol->value.enumerated.item[0] = hs_switch;
 	return 0;
 }
 
@@ -104,12 +104,12 @@
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 	struct snd_soc_dapm_context *dapm = &card->dapm;
 
-	if (ucontrol->value.integer.value[0] == hs_switch)
+	if (ucontrol->value.enumerated.item[0] == hs_switch)
 		return 0;
 
 	snd_soc_dapm_mutex_lock(dapm);
 
-	if (ucontrol->value.integer.value[0]) {
+	if (ucontrol->value.enumerated.item[0]) {
 		pr_debug("hs_set HS path\n");
 		snd_soc_dapm_enable_pin_unlocked(dapm, "Headphones");
 		snd_soc_dapm_disable_pin_unlocked(dapm, "EPOUT");
@@ -123,7 +123,7 @@
 
 	snd_soc_dapm_mutex_unlock(dapm);
 
-	hs_switch = ucontrol->value.integer.value[0];
+	hs_switch = ucontrol->value.enumerated.item[0];
 
 	return 0;
 }
@@ -148,7 +148,7 @@
 static int lo_get_switch(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = lo_dac;
+	ucontrol->value.enumerated.item[0] = lo_dac;
 	return 0;
 }
 
@@ -158,7 +158,7 @@
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 	struct snd_soc_dapm_context *dapm = &card->dapm;
 
-	if (ucontrol->value.integer.value[0] == lo_dac)
+	if (ucontrol->value.enumerated.item[0] == lo_dac)
 		return 0;
 
 	snd_soc_dapm_mutex_lock(dapm);
@@ -168,7 +168,7 @@
 	 */
 	lo_enable_out_pins(dapm);
 
-	switch (ucontrol->value.integer.value[0]) {
+	switch (ucontrol->value.enumerated.item[0]) {
 	case 0:
 		pr_debug("set vibra path\n");
 		snd_soc_dapm_disable_pin_unlocked(dapm, "VIB1OUT");
@@ -202,7 +202,7 @@
 
 	snd_soc_dapm_mutex_unlock(dapm);
 
-	lo_dac = ucontrol->value.integer.value[0];
+	lo_dac = ucontrol->value.enumerated.item[0];
 	return 0;
 }
 
diff --git a/sound/soc/intel/boards/skl_rt286.c b/sound/soc/intel/boards/skl_rt286.c
index 7396ddb..2cbcbe4 100644
--- a/sound/soc/intel/boards/skl_rt286.c
+++ b/sound/soc/intel/boards/skl_rt286.c
@@ -212,7 +212,10 @@
 {
 	struct snd_interval *channels = hw_param_interval(params,
 						SNDRV_PCM_HW_PARAM_CHANNELS);
-	channels->min = channels->max = 4;
+	if (params_channels(params) == 2)
+		channels->min = channels->max = 2;
+	else
+		channels->min = channels->max = 4;
 
 	return 0;
 }
diff --git a/sound/soc/intel/common/Makefile b/sound/soc/intel/common/Makefile
index 668fdee..fbbb25c 100644
--- a/sound/soc/intel/common/Makefile
+++ b/sound/soc/intel/common/Makefile
@@ -1,13 +1,10 @@
 snd-soc-sst-dsp-objs := sst-dsp.o
-ifneq ($(CONFIG_SND_SST_IPC_ACPI),)
-snd-soc-sst-acpi-objs := sst-match-acpi.o
-else
-snd-soc-sst-acpi-objs := sst-acpi.o sst-match-acpi.o
-endif
-
+snd-soc-sst-acpi-objs := sst-acpi.o
+snd-soc-sst-match-objs := sst-match-acpi.o
 snd-soc-sst-ipc-objs := sst-ipc.o
 
 snd-soc-sst-dsp-$(CONFIG_DW_DMAC_CORE) += sst-firmware.o
 
 obj-$(CONFIG_SND_SOC_INTEL_SST) += snd-soc-sst-dsp.o snd-soc-sst-ipc.o
 obj-$(CONFIG_SND_SOC_INTEL_SST_ACPI) += snd-soc-sst-acpi.o
+obj-$(CONFIG_SND_SOC_INTEL_SST_MATCH) += snd-soc-sst-match.o
diff --git a/sound/soc/intel/common/sst-acpi.c b/sound/soc/intel/common/sst-acpi.c
index 7a85c57..2c5eda1 100644
--- a/sound/soc/intel/common/sst-acpi.c
+++ b/sound/soc/intel/common/sst-acpi.c
@@ -215,6 +215,7 @@
 	.dma_size = SST_LPT_DSP_DMA_SIZE,
 };
 
+#if !IS_ENABLED(CONFIG_SND_SST_IPC_ACPI)
 static struct sst_acpi_mach baytrail_machines[] = {
 	{ "10EC5640", "byt-rt5640", "intel/fw_sst_0f28.bin-48kHz_i2s_master", NULL, NULL, NULL },
 	{ "193C9890", "byt-max98090", "intel/fw_sst_0f28.bin-48kHz_i2s_master", NULL, NULL, NULL },
@@ -231,11 +232,14 @@
 	.sst_id = SST_DEV_ID_BYT,
 	.resindex_dma_base = -1,
 };
+#endif
 
 static const struct acpi_device_id sst_acpi_match[] = {
 	{ "INT33C8", (unsigned long)&sst_acpi_haswell_desc },
 	{ "INT3438", (unsigned long)&sst_acpi_broadwell_desc },
+#if !IS_ENABLED(CONFIG_SND_SST_IPC_ACPI)
 	{ "80860F28", (unsigned long)&sst_acpi_baytrail_desc },
+#endif
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, sst_acpi_match);
diff --git a/sound/soc/intel/common/sst-match-acpi.c b/sound/soc/intel/common/sst-match-acpi.c
index dd077e1..3b4539d 100644
--- a/sound/soc/intel/common/sst-match-acpi.c
+++ b/sound/soc/intel/common/sst-match-acpi.c
@@ -41,3 +41,6 @@
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(sst_acpi_find_machine);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c
index de6dac4..4629372 100644
--- a/sound/soc/intel/skylake/skl-messages.c
+++ b/sound/soc/intel/skylake/skl-messages.c
@@ -688,14 +688,14 @@
 	/* get src queue index */
 	src_index = skl_get_queue_index(src_mcfg->m_out_pin, dst_id, out_max);
 	if (src_index < 0)
-		return -EINVAL;
+		return 0;
 
 	msg.src_queue = src_index;
 
 	/* get dst queue index */
 	dst_index  = skl_get_queue_index(dst_mcfg->m_in_pin, src_id, in_max);
 	if (dst_index < 0)
-		return -EINVAL;
+		return 0;
 
 	msg.dst_queue = dst_index;
 
@@ -747,7 +747,7 @@
 
 	skl_dump_bind_info(ctx, src_mcfg, dst_mcfg);
 
-	if (src_mcfg->m_state < SKL_MODULE_INIT_DONE &&
+	if (src_mcfg->m_state < SKL_MODULE_INIT_DONE ||
 		dst_mcfg->m_state < SKL_MODULE_INIT_DONE)
 		return 0;
 
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index f355325..b6e6b61 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -863,6 +863,7 @@
 		else
 			delay += hstream->bufsize;
 	}
+	delay = (hstream->bufsize == delay) ? 0 : delay;
 
 	if (delay >= hstream->period_bytes) {
 		dev_info(bus->dev,
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index 4624556..5a4837d 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -54,12 +54,9 @@
 
 /*
  * Each pipelines needs memory to be allocated. Check if we have free memory
- * from available pool. Then only add this to pool
- * This is freed when pipe is deleted
- * Note: DSP does actual memory management we only keep track for complete
- * pool
+ * from available pool.
  */
-static bool skl_tplg_alloc_pipe_mem(struct skl *skl,
+static bool skl_is_pipe_mem_avail(struct skl *skl,
 				struct skl_module_cfg *mconfig)
 {
 	struct skl_sst *ctx = skl->skl_sst;
@@ -74,10 +71,20 @@
 				"exceeds ppl memory available %d mem %d\n",
 				skl->resource.max_mem, skl->resource.mem);
 		return false;
+	} else {
+		return true;
 	}
+}
 
+/*
+ * Add the mem to the mem pool. This is freed when pipe is deleted.
+ * Note: DSP does actual memory management we only keep track for complete
+ * pool
+ */
+static void skl_tplg_alloc_pipe_mem(struct skl *skl,
+				struct skl_module_cfg *mconfig)
+{
 	skl->resource.mem += mconfig->pipe->memory_pages;
-	return true;
 }
 
 /*
@@ -85,10 +92,10 @@
  * quantified in MCPS (Million Clocks Per Second) required for module/pipe
  *
  * Each pipelines needs mcps to be allocated. Check if we have mcps for this
- * pipe. This adds the mcps to driver counter
- * This is removed on pipeline delete
+ * pipe.
  */
-static bool skl_tplg_alloc_pipe_mcps(struct skl *skl,
+
+static bool skl_is_pipe_mcps_avail(struct skl *skl,
 				struct skl_module_cfg *mconfig)
 {
 	struct skl_sst *ctx = skl->skl_sst;
@@ -98,13 +105,18 @@
 			"%s: module_id %d instance %d\n", __func__,
 			mconfig->id.module_id, mconfig->id.instance_id);
 		dev_err(ctx->dev,
-			"exceeds ppl memory available %d > mem %d\n",
+			"exceeds ppl mcps available %d > mem %d\n",
 			skl->resource.max_mcps, skl->resource.mcps);
 		return false;
+	} else {
+		return true;
 	}
+}
 
+static void skl_tplg_alloc_pipe_mcps(struct skl *skl,
+				struct skl_module_cfg *mconfig)
+{
 	skl->resource.mcps += mconfig->mcps;
-	return true;
 }
 
 /*
@@ -411,7 +423,7 @@
 		mconfig = w->priv;
 
 		/* check resource available */
-		if (!skl_tplg_alloc_pipe_mcps(skl, mconfig))
+		if (!skl_is_pipe_mcps_avail(skl, mconfig))
 			return -ENOMEM;
 
 		if (mconfig->is_loadable && ctx->dsp->fw_ops.load_mod) {
@@ -435,6 +447,7 @@
 		ret = skl_tplg_set_module_params(w, ctx);
 		if (ret < 0)
 			return ret;
+		skl_tplg_alloc_pipe_mcps(skl, mconfig);
 	}
 
 	return 0;
@@ -477,10 +490,10 @@
 	struct skl_sst *ctx = skl->skl_sst;
 
 	/* check resource available */
-	if (!skl_tplg_alloc_pipe_mcps(skl, mconfig))
+	if (!skl_is_pipe_mcps_avail(skl, mconfig))
 		return -EBUSY;
 
-	if (!skl_tplg_alloc_pipe_mem(skl, mconfig))
+	if (!skl_is_pipe_mem_avail(skl, mconfig))
 		return -ENOMEM;
 
 	/*
@@ -526,11 +539,15 @@
 		src_module = dst_module;
 	}
 
+	skl_tplg_alloc_pipe_mem(skl, mconfig);
+	skl_tplg_alloc_pipe_mcps(skl, mconfig);
+
 	return 0;
 }
 
 static int skl_tplg_bind_sinks(struct snd_soc_dapm_widget *w,
 				struct skl *skl,
+				struct snd_soc_dapm_widget *src_w,
 				struct skl_module_cfg *src_mconfig)
 {
 	struct snd_soc_dapm_path *p;
@@ -547,6 +564,10 @@
 		dev_dbg(ctx->dev, "%s: sink widget=%s\n", __func__, p->sink->name);
 
 		next_sink = p->sink;
+
+		if (!is_skl_dsp_widget_type(p->sink))
+			return skl_tplg_bind_sinks(p->sink, skl, src_w, src_mconfig);
+
 		/*
 		 * here we will check widgets in sink pipelines, so that
 		 * can be any widgets type and we are only interested if
@@ -576,7 +597,7 @@
 	}
 
 	if (!sink)
-		return skl_tplg_bind_sinks(next_sink, skl, src_mconfig);
+		return skl_tplg_bind_sinks(next_sink, skl, src_w, src_mconfig);
 
 	return 0;
 }
@@ -605,7 +626,7 @@
 	 * if sink is not started, start sink pipe first, then start
 	 * this pipe
 	 */
-	ret = skl_tplg_bind_sinks(w, skl, src_mconfig);
+	ret = skl_tplg_bind_sinks(w, skl, w, src_mconfig);
 	if (ret)
 		return ret;
 
@@ -773,10 +794,7 @@
 			continue;
 		}
 
-		ret = skl_unbind_modules(ctx, src_module, dst_module);
-		if (ret < 0)
-			return ret;
-
+		skl_unbind_modules(ctx, src_module, dst_module);
 		src_module = dst_module;
 	}
 
@@ -814,9 +832,6 @@
 			 * This is a connecter and if path is found that means
 			 * unbind between source and sink has not happened yet
 			 */
-			ret = skl_stop_pipe(ctx, sink_mconfig->pipe);
-			if (ret < 0)
-				return ret;
 			ret = skl_unbind_modules(ctx, src_mconfig,
 							sink_mconfig);
 		}
@@ -842,6 +857,12 @@
 	case SND_SOC_DAPM_PRE_PMU:
 		return skl_tplg_mixer_dapm_pre_pmu_event(w, skl);
 
+	case SND_SOC_DAPM_POST_PMU:
+		return skl_tplg_mixer_dapm_post_pmu_event(w, skl);
+
+	case SND_SOC_DAPM_PRE_PMD:
+		return skl_tplg_mixer_dapm_pre_pmd_event(w, skl);
+
 	case SND_SOC_DAPM_POST_PMD:
 		return skl_tplg_mixer_dapm_post_pmd_event(w, skl);
 	}
@@ -916,6 +937,13 @@
 		skl_get_module_params(skl->skl_sst, (u32 *)bc->params,
 				      bc->max, bc->param_id, mconfig);
 
+	/* decrement size for TLV header */
+	size -= 2 * sizeof(u32);
+
+	/* check size as we don't want to send kernel data */
+	if (size > bc->max)
+		size = bc->max;
+
 	if (bc->params) {
 		if (copy_to_user(data, &bc->param_id, sizeof(u32)))
 			return -EFAULT;
@@ -950,7 +978,7 @@
 				return -EFAULT;
 		} else {
 			if (copy_from_user(ac->params,
-					   data + 2 * sizeof(u32), size))
+					   data + 2, size))
 				return -EFAULT;
 		}
 
@@ -1510,6 +1538,7 @@
 					&skl_tplg_ops, fw, 0);
 	if (ret < 0) {
 		dev_err(bus->dev, "tplg component load failed%d\n", ret);
+		release_firmware(fw);
 		return -EINVAL;
 	}
 
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 443a15d..092705e 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -614,8 +614,6 @@
 		goto out_unregister;
 
 	/*configure PM */
-	pm_runtime_set_autosuspend_delay(bus->dev, SKL_SUSPEND_DELAY);
-	pm_runtime_use_autosuspend(bus->dev);
 	pm_runtime_put_noidle(bus->dev);
 	pm_runtime_allow(bus->dev);
 
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 15c04e2..9769676 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -9,7 +9,7 @@
 
 config SND_SOC_MT8173_MAX98090
 	tristate "ASoC Audio driver for MT8173 with MAX98090 codec"
-	depends on SND_SOC_MEDIATEK
+	depends on SND_SOC_MEDIATEK && I2C
 	select SND_SOC_MAX98090
 	help
 	  This adds ASoC driver for Mediatek MT8173 boards
@@ -19,7 +19,7 @@
 
 config SND_SOC_MT8173_RT5650_RT5676
 	tristate "ASoC Audio driver for MT8173 with RT5650 RT5676 codecs"
-	depends on SND_SOC_MEDIATEK
+	depends on SND_SOC_MEDIATEK && I2C
 	select SND_SOC_RT5645
 	select SND_SOC_RT5677
 	help
diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c
index c866ade..a6c7b8d 100644
--- a/sound/soc/mxs/mxs-saif.c
+++ b/sound/soc/mxs/mxs-saif.c
@@ -381,9 +381,19 @@
 	__raw_writel(BM_SAIF_CTRL_CLKGATE,
 		saif->base + SAIF_CTRL + MXS_CLR_ADDR);
 
+	clk_prepare(saif->clk);
+
 	return 0;
 }
 
+static void mxs_saif_shutdown(struct snd_pcm_substream *substream,
+			      struct snd_soc_dai *cpu_dai)
+{
+	struct mxs_saif *saif = snd_soc_dai_get_drvdata(cpu_dai);
+
+	clk_unprepare(saif->clk);
+}
+
 /*
  * Should only be called when port is inactive.
  * although can be called multiple times by upper layers.
@@ -424,8 +434,6 @@
 		return ret;
 	}
 
-	/* prepare clk in hw_param, enable in trigger */
-	clk_prepare(saif->clk);
 	if (saif != master_saif) {
 		/*
 		* Set an initial clock rate for the saif internal logic to work
@@ -611,6 +619,7 @@
 
 static const struct snd_soc_dai_ops mxs_saif_dai_ops = {
 	.startup = mxs_saif_startup,
+	.shutdown = mxs_saif_shutdown,
 	.trigger = mxs_saif_trigger,
 	.prepare = mxs_saif_prepare,
 	.hw_params = mxs_saif_hw_params,
diff --git a/sound/soc/nuc900/nuc900-pcm.c b/sound/soc/nuc900/nuc900-pcm.c
index e093261..2cca055 100644
--- a/sound/soc/nuc900/nuc900-pcm.c
+++ b/sound/soc/nuc900/nuc900-pcm.c
@@ -267,10 +267,8 @@
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
-	return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-					runtime->dma_area,
-					runtime->dma_addr,
-					runtime->dma_bytes);
+	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			   runtime->dma_addr, runtime->dma_bytes);
 }
 
 static struct snd_pcm_ops nuc900_dma_ops = {
diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index 190f868..fdecb70 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -133,7 +133,7 @@
 static int n810_get_spk(struct snd_kcontrol *kcontrol,
 			struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = n810_spk_func;
+	ucontrol->value.enumerated.item[0] = n810_spk_func;
 
 	return 0;
 }
@@ -143,10 +143,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (n810_spk_func == ucontrol->value.integer.value[0])
+	if (n810_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	n810_spk_func = ucontrol->value.integer.value[0];
+	n810_spk_func = ucontrol->value.enumerated.item[0];
 	n810_ext_control(&card->dapm);
 
 	return 1;
@@ -155,7 +155,7 @@
 static int n810_get_jack(struct snd_kcontrol *kcontrol,
 			 struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = n810_jack_func;
+	ucontrol->value.enumerated.item[0] = n810_jack_func;
 
 	return 0;
 }
@@ -165,10 +165,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (n810_jack_func == ucontrol->value.integer.value[0])
+	if (n810_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	n810_jack_func = ucontrol->value.integer.value[0];
+	n810_jack_func = ucontrol->value.enumerated.item[0];
 	n810_ext_control(&card->dapm);
 
 	return 1;
@@ -177,7 +177,7 @@
 static int n810_get_input(struct snd_kcontrol *kcontrol,
 			  struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = n810_dmic_func;
+	ucontrol->value.enumerated.item[0] = n810_dmic_func;
 
 	return 0;
 }
@@ -187,10 +187,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (n810_dmic_func == ucontrol->value.integer.value[0])
+	if (n810_dmic_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	n810_dmic_func = ucontrol->value.integer.value[0];
+	n810_dmic_func = ucontrol->value.enumerated.item[0];
 	n810_ext_control(&card->dapm);
 
 	return 1;
diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c
index 6bb623a..99381a2 100644
--- a/sound/soc/omap/omap-pcm.c
+++ b/sound/soc/omap/omap-pcm.c
@@ -156,10 +156,8 @@
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
-	return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-				     runtime->dma_area,
-				     runtime->dma_addr,
-				     runtime->dma_bytes);
+	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			   runtime->dma_addr, runtime->dma_bytes);
 }
 
 static struct snd_pcm_ops omap_pcm_ops = {
@@ -183,8 +181,7 @@
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
 	buf->dev.dev = pcm->card->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-					   &buf->addr, GFP_KERNEL);
+	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
 	if (!buf->area)
 		return -ENOMEM;
 
@@ -207,8 +204,7 @@
 		if (!buf->area)
 			continue;
 
-		dma_free_writecombine(pcm->card->dev, buf->bytes,
-				      buf->area, buf->addr);
+		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
 		buf->area = NULL;
 	}
 }
diff --git a/sound/soc/omap/rx51.c b/sound/soc/omap/rx51.c
index 5e21f08..5494924 100644
--- a/sound/soc/omap/rx51.c
+++ b/sound/soc/omap/rx51.c
@@ -132,7 +132,7 @@
 static int rx51_get_spk(struct snd_kcontrol *kcontrol,
 			struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = rx51_spk_func;
+	ucontrol->value.enumerated.item[0] = rx51_spk_func;
 
 	return 0;
 }
@@ -142,10 +142,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (rx51_spk_func == ucontrol->value.integer.value[0])
+	if (rx51_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	rx51_spk_func = ucontrol->value.integer.value[0];
+	rx51_spk_func = ucontrol->value.enumerated.item[0];
 	rx51_ext_control(&card->dapm);
 
 	return 1;
@@ -180,7 +180,7 @@
 static int rx51_get_input(struct snd_kcontrol *kcontrol,
 			  struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = rx51_dmic_func;
+	ucontrol->value.enumerated.item[0] = rx51_dmic_func;
 
 	return 0;
 }
@@ -190,10 +190,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (rx51_dmic_func == ucontrol->value.integer.value[0])
+	if (rx51_dmic_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	rx51_dmic_func = ucontrol->value.integer.value[0];
+	rx51_dmic_func = ucontrol->value.enumerated.item[0];
 	rx51_ext_control(&card->dapm);
 
 	return 1;
@@ -202,7 +202,7 @@
 static int rx51_get_jack(struct snd_kcontrol *kcontrol,
 			 struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = rx51_jack_func;
+	ucontrol->value.enumerated.item[0] = rx51_jack_func;
 
 	return 0;
 }
@@ -212,10 +212,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (rx51_jack_func == ucontrol->value.integer.value[0])
+	if (rx51_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	rx51_jack_func = ucontrol->value.integer.value[0];
+	rx51_jack_func = ucontrol->value.enumerated.item[0];
 	rx51_ext_control(&card->dapm);
 
 	return 1;
diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c
index c97dc13..dcbb7aa 100644
--- a/sound/soc/pxa/corgi.c
+++ b/sound/soc/pxa/corgi.c
@@ -163,7 +163,7 @@
 static int corgi_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = corgi_jack_func;
+	ucontrol->value.enumerated.item[0] = corgi_jack_func;
 	return 0;
 }
 
@@ -172,10 +172,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (corgi_jack_func == ucontrol->value.integer.value[0])
+	if (corgi_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	corgi_jack_func = ucontrol->value.integer.value[0];
+	corgi_jack_func = ucontrol->value.enumerated.item[0];
 	corgi_ext_control(&card->dapm);
 	return 1;
 }
@@ -183,7 +183,7 @@
 static int corgi_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = corgi_spk_func;
+	ucontrol->value.enumerated.item[0] = corgi_spk_func;
 	return 0;
 }
 
@@ -192,10 +192,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (corgi_spk_func == ucontrol->value.integer.value[0])
+	if (corgi_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	corgi_spk_func = ucontrol->value.integer.value[0];
+	corgi_spk_func = ucontrol->value.enumerated.item[0];
 	corgi_ext_control(&card->dapm);
 	return 1;
 }
diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c
index 241d0be..62b8377 100644
--- a/sound/soc/pxa/magician.c
+++ b/sound/soc/pxa/magician.c
@@ -308,17 +308,17 @@
 static int magician_get_input(struct snd_kcontrol *kcontrol,
 			      struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = magician_in_sel;
+	ucontrol->value.enumerated.item[0] = magician_in_sel;
 	return 0;
 }
 
 static int magician_set_input(struct snd_kcontrol *kcontrol,
 			      struct snd_ctl_elem_value *ucontrol)
 {
-	if (magician_in_sel == ucontrol->value.integer.value[0])
+	if (magician_in_sel == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	magician_in_sel = ucontrol->value.integer.value[0];
+	magician_in_sel = ucontrol->value.enumerated.item[0];
 
 	switch (magician_in_sel) {
 	case MAGICIAN_MIC:
diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c
index 84d0e2e..4b3b714 100644
--- a/sound/soc/pxa/poodle.c
+++ b/sound/soc/pxa/poodle.c
@@ -138,7 +138,7 @@
 static int poodle_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = poodle_jack_func;
+	ucontrol->value.enumerated.item[0] = poodle_jack_func;
 	return 0;
 }
 
@@ -147,10 +147,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (poodle_jack_func == ucontrol->value.integer.value[0])
+	if (poodle_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	poodle_jack_func = ucontrol->value.integer.value[0];
+	poodle_jack_func = ucontrol->value.enumerated.item[0];
 	poodle_ext_control(&card->dapm);
 	return 1;
 }
@@ -158,7 +158,7 @@
 static int poodle_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = poodle_spk_func;
+	ucontrol->value.enumerated.item[0] = poodle_spk_func;
 	return 0;
 }
 
@@ -167,10 +167,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (poodle_spk_func == ucontrol->value.integer.value[0])
+	if (poodle_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	poodle_spk_func = ucontrol->value.integer.value[0];
+	poodle_spk_func = ucontrol->value.enumerated.item[0];
 	poodle_ext_control(&card->dapm);
 	return 1;
 }
diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c
index b002226..0e02634 100644
--- a/sound/soc/pxa/spitz.c
+++ b/sound/soc/pxa/spitz.c
@@ -164,7 +164,7 @@
 static int spitz_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = spitz_jack_func;
+	ucontrol->value.enumerated.item[0] = spitz_jack_func;
 	return 0;
 }
 
@@ -173,10 +173,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (spitz_jack_func == ucontrol->value.integer.value[0])
+	if (spitz_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	spitz_jack_func = ucontrol->value.integer.value[0];
+	spitz_jack_func = ucontrol->value.enumerated.item[0];
 	spitz_ext_control(&card->dapm);
 	return 1;
 }
@@ -184,7 +184,7 @@
 static int spitz_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = spitz_spk_func;
+	ucontrol->value.enumerated.item[0] = spitz_spk_func;
 	return 0;
 }
 
@@ -193,10 +193,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (spitz_spk_func == ucontrol->value.integer.value[0])
+	if (spitz_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	spitz_spk_func = ucontrol->value.integer.value[0];
+	spitz_spk_func = ucontrol->value.enumerated.item[0];
 	spitz_ext_control(&card->dapm);
 	return 1;
 }
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index 49518dd..c508f02 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -95,7 +95,7 @@
 static int tosa_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = tosa_jack_func;
+	ucontrol->value.enumerated.item[0] = tosa_jack_func;
 	return 0;
 }
 
@@ -104,10 +104,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (tosa_jack_func == ucontrol->value.integer.value[0])
+	if (tosa_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	tosa_jack_func = ucontrol->value.integer.value[0];
+	tosa_jack_func = ucontrol->value.enumerated.item[0];
 	tosa_ext_control(&card->dapm);
 	return 1;
 }
@@ -115,7 +115,7 @@
 static int tosa_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = tosa_spk_func;
+	ucontrol->value.enumerated.item[0] = tosa_spk_func;
 	return 0;
 }
 
@@ -124,10 +124,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (tosa_spk_func == ucontrol->value.integer.value[0])
+	if (tosa_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	tosa_spk_func = ucontrol->value.integer.value[0];
+	tosa_spk_func = ucontrol->value.enumerated.item[0];
 	tosa_ext_control(&card->dapm);
 	return 1;
 }
diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c
index 00b6c9d..e5101e0 100644
--- a/sound/soc/qcom/lpass-cpu.c
+++ b/sound/soc/qcom/lpass-cpu.c
@@ -355,7 +355,6 @@
 	.readable_reg = lpass_cpu_regmap_readable,
 	.volatile_reg = lpass_cpu_regmap_volatile,
 	.cache_type = REGCACHE_FLAT,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev)
diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c
index 79688aa..4aeb8e1 100644
--- a/sound/soc/qcom/lpass-platform.c
+++ b/sound/soc/qcom/lpass-platform.c
@@ -440,18 +440,18 @@
 }
 
 static int lpass_platform_alloc_buffer(struct snd_pcm_substream *substream,
-		struct snd_soc_pcm_runtime *soc_runtime)
+		struct snd_soc_pcm_runtime *rt)
 {
 	struct snd_dma_buffer *buf = &substream->dma_buffer;
 	size_t size = lpass_platform_pcm_hardware.buffer_bytes_max;
 
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
-	buf->dev.dev = soc_runtime->dev;
+	buf->dev.dev = rt->platform->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_coherent(soc_runtime->dev, size, &buf->addr,
+	buf->area = dma_alloc_coherent(rt->platform->dev, size, &buf->addr,
 			GFP_KERNEL);
 	if (!buf->area) {
-		dev_err(soc_runtime->dev, "%s: Could not allocate DMA buffer\n",
+		dev_err(rt->platform->dev, "%s: Could not allocate DMA buffer\n",
 				__func__);
 		return -ENOMEM;
 	}
@@ -461,12 +461,12 @@
 }
 
 static void lpass_platform_free_buffer(struct snd_pcm_substream *substream,
-		struct snd_soc_pcm_runtime *soc_runtime)
+		struct snd_soc_pcm_runtime *rt)
 {
 	struct snd_dma_buffer *buf = &substream->dma_buffer;
 
 	if (buf->area) {
-		dma_free_coherent(soc_runtime->dev, buf->bytes, buf->area,
+		dma_free_coherent(rt->dev, buf->bytes, buf->area,
 				buf->addr);
 	}
 	buf->area = NULL;
@@ -499,9 +499,6 @@
 
 	snd_soc_pcm_set_drvdata(soc_runtime, data);
 
-	soc_runtime->dev->coherent_dma_mask = DMA_BIT_MASK(32);
-	soc_runtime->dev->dma_mask = &soc_runtime->dev->coherent_dma_mask;
-
 	ret = lpass_platform_alloc_buffer(substream, soc_runtime);
 	if (ret)
 		return ret;
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 84d9e77..70a2559 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -481,10 +481,11 @@
 	unsigned int cdcon_mask = 1 << i2s_regs->cdclkcon_off;
 	unsigned int rsrc_mask = 1 << i2s_regs->rclksrc_off;
 	u32 mod, mask, val = 0;
+	unsigned long flags;
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	switch (clk_id) {
 	case SAMSUNG_I2S_OPCLK:
@@ -575,11 +576,11 @@
 		return -EINVAL;
 	}
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
 	mod = (mod & ~mask) | val;
 	writel(mod, i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	return 0;
 }
@@ -590,6 +591,7 @@
 	struct i2s_dai *i2s = to_info(dai);
 	int lrp_shift, sdf_shift, sdf_mask, lrp_rlow, mod_slave;
 	u32 mod, tmp = 0;
+	unsigned long flags;
 
 	lrp_shift = i2s->variant_regs->lrp_off;
 	sdf_shift = i2s->variant_regs->sdf_off;
@@ -649,7 +651,7 @@
 		return -EINVAL;
 	}
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
 	/*
 	 * Don't change the I2S mode if any controller is active on this
@@ -657,7 +659,7 @@
 	 */
 	if (any_active(i2s) &&
 		((mod & (sdf_mask | lrp_rlow | mod_slave)) != tmp)) {
-		spin_unlock(i2s->lock);
+		spin_unlock_irqrestore(i2s->lock, flags);
 		dev_err(&i2s->pdev->dev,
 				"%s:%d Other DAI busy\n", __func__, __LINE__);
 		return -EAGAIN;
@@ -666,7 +668,7 @@
 	mod &= ~(sdf_mask | lrp_rlow | mod_slave);
 	mod |= tmp;
 	writel(mod, i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	return 0;
 }
@@ -676,6 +678,7 @@
 {
 	struct i2s_dai *i2s = to_info(dai);
 	u32 mod, mask = 0, val = 0;
+	unsigned long flags;
 
 	if (!is_secondary(i2s))
 		mask |= (MOD_DC2_EN | MOD_DC1_EN);
@@ -744,11 +747,11 @@
 		return -EINVAL;
 	}
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
 	mod = (mod & ~mask) | val;
 	writel(mod, i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	samsung_asoc_init_dma_data(dai, &i2s->dma_playback, &i2s->dma_capture);
 
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 5a2812f..581175a 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -310,7 +310,7 @@
 };
 
 static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget,
-	struct snd_kcontrol *kcontrol)
+	struct snd_kcontrol *kcontrol, const char *ctrl_name)
 {
 	struct dapm_kcontrol_data *data;
 	struct soc_mixer_control *mc;
@@ -333,7 +333,7 @@
 		if (mc->autodisable) {
 			struct snd_soc_dapm_widget template;
 
-			name = kasprintf(GFP_KERNEL, "%s %s", kcontrol->id.name,
+			name = kasprintf(GFP_KERNEL, "%s %s", ctrl_name,
 					 "Autodisable");
 			if (!name) {
 				ret = -ENOMEM;
@@ -371,7 +371,7 @@
 		if (e->autodisable) {
 			struct snd_soc_dapm_widget template;
 
-			name = kasprintf(GFP_KERNEL, "%s %s", kcontrol->id.name,
+			name = kasprintf(GFP_KERNEL, "%s %s", ctrl_name,
 					 "Autodisable");
 			if (!name) {
 				ret = -ENOMEM;
@@ -871,7 +871,7 @@
 
 		kcontrol->private_free = dapm_kcontrol_free;
 
-		ret = dapm_kcontrol_data_alloc(w, kcontrol);
+		ret = dapm_kcontrol_data_alloc(w, kcontrol, name);
 		if (ret) {
 			snd_ctl_free_one(kcontrol);
 			goto exit_free;
@@ -3573,7 +3573,7 @@
 {
 	struct snd_soc_dapm_widget *w = snd_kcontrol_chip(kcontrol);
 
-	ucontrol->value.integer.value[0] = w->params_select;
+	ucontrol->value.enumerated.item[0] = w->params_select;
 
 	return 0;
 }
@@ -3587,13 +3587,13 @@
 	if (w->power)
 		return -EBUSY;
 
-	if (ucontrol->value.integer.value[0] == w->params_select)
+	if (ucontrol->value.enumerated.item[0] == w->params_select)
 		return 0;
 
-	if (ucontrol->value.integer.value[0] >= w->num_params)
+	if (ucontrol->value.enumerated.item[0] >= w->num_params)
 		return -EINVAL;
 
-	w->params_select = ucontrol->value.integer.value[0];
+	w->params_select = ucontrol->value.enumerated.item[0];
 
 	return 0;
 }
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index e898b42..1af4f23 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1810,7 +1810,8 @@
 		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) &&
 		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) &&
 		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED) &&
-		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP))
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND))
 			continue;
 
 		dev_dbg(be->dev, "ASoC: hw_free BE %s\n",
diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig
index d75deba..dfcd386 100644
--- a/sound/sparc/Kconfig
+++ b/sound/sparc/Kconfig
@@ -22,6 +22,7 @@
 config SND_SUN_CS4231
 	tristate "Sun CS4231"
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for CS4231 sound device on Sun.
 
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index cc39f63..007cf58 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -2455,7 +2455,6 @@
 	else
 		err = snd_usbmidi_create_endpoints(umidi, endpoints);
 	if (err < 0) {
-		snd_usbmidi_free(umidi);
 		return err;
 	}
 
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 23ea6d8..c458d60 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1121,8 +1121,10 @@
 	switch (chip->usb_id) {
 	case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
 	case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+	case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
 	case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
 	case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
+	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
 	case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
@@ -1205,8 +1207,12 @@
 	 * "Playback Design" products need a 50ms delay after setting the
 	 * USB interface.
 	 */
-	if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+	switch (le16_to_cpu(dev->descriptor.idVendor)) {
+	case 0x23ba: /* Playback Design */
+	case 0x0644: /* TEAC Corp. */
 		mdelay(50);
+		break;
+	}
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1227,14 @@
 	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(20);
 
+	/*
+	 * "TEAC Corp." products need a 20ms delay after each
+	 * class compliant request
+	 */
+	if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+		mdelay(20);
+
 	/* Marantz/Denon devices with USB DAC functionality need a delay
 	 * after each class compliant request
 	 */
@@ -1269,7 +1283,7 @@
 	case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
 	case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
 	case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
-	case USB_ID(0x22d8, 0x0416): /* OPPO HA-1*/
+	case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */
 		if (fp->altsetting == 2)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
@@ -1278,6 +1292,7 @@
 	case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
 	case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
 	case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */
+	case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
 		if (fp->altsetting == 3)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 4a96473..ee566e8 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -85,7 +85,7 @@
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 
-$(OUTPUT)%.i: %.S FORCE
+$(OUTPUT)%.s: %.S FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 02db3cd..6b77072 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -27,7 +27,7 @@
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
-FEATURE_TESTS ?=			\
+FEATURE_TESTS_BASIC :=			\
 	backtrace			\
 	dwarf				\
 	fortify-source			\
@@ -46,6 +46,7 @@
 	libpython			\
 	libpython-version		\
 	libslang			\
+	libcrypto			\
 	libunwind			\
 	pthread-attr-setaffinity-np	\
 	stackprotector-all		\
@@ -56,6 +57,25 @@
 	get_cpuid			\
 	bpf
 
+# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
+# of all feature tests
+FEATURE_TESTS_EXTRA :=			\
+	bionic				\
+	compile-32			\
+	compile-x32			\
+	cplus-demangle			\
+	hello				\
+	libbabeltrace			\
+	liberty				\
+	liberty-z			\
+	libunwind-debug-frame
+
+FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
+
+ifeq ($(FEATURE_TESTS),all)
+  FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA)
+endif
+
 FEATURE_DISPLAY ?=			\
 	dwarf				\
 	glibc				\
@@ -68,6 +88,7 @@
 	libperl				\
 	libpython			\
 	libslang			\
+	libcrypto			\
 	libunwind			\
 	libdw-dwarf-unwind		\
 	zlib				\
@@ -100,6 +121,14 @@
   # test-all.c passed - just set all the core feature flags to 1:
   #
   $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+  #
+  # test-all.c does not comprise these tests, so we need to
+  # for this case to get features proper values
+  #
+  $(call feature_check,compile-32)
+  $(call feature_check,compile-x32)
+  $(call feature_check,bionic)
+  $(call feature_check,libbabeltrace)
 else
   $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
 endif
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index bf8f035..c5f4c41 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -23,6 +23,7 @@
 	test-libpython.bin		\
 	test-libpython-version.bin	\
 	test-libslang.bin		\
+	test-libcrypto.bin		\
 	test-libunwind.bin		\
 	test-libunwind-debug-frame.bin	\
 	test-pthread-attr-setaffinity-np.bin	\
@@ -105,6 +106,9 @@
 $(OUTPUT)test-libslang.bin:
 	$(BUILD) -I/usr/include/slang -lslang
 
+$(OUTPUT)test-libcrypto.bin:
+	$(BUILD) -lcrypto
+
 $(OUTPUT)test-gtk2.bin:
 	$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 81025ca..e499a36 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -129,6 +129,10 @@
 # include "test-bpf.c"
 #undef main
 
+#define main main_test_libcrypto
+# include "test-libcrypto.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -158,6 +162,7 @@
 	main_test_lzma();
 	main_test_get_cpuid();
 	main_test_bpf();
+	main_test_libcrypto();
 
 	return 0;
 }
diff --git a/tools/build/feature/test-compile.c b/tools/build/feature/test-compile.c
index 31dbf45..c54e655 100644
--- a/tools/build/feature/test-compile.c
+++ b/tools/build/feature/test-compile.c
@@ -1,4 +1,6 @@
+#include <stdio.h>
 int main(void)
 {
+	printf("Hello World!\n");
 	return 0;
 }
diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c
new file mode 100644
index 0000000..bd79dc7
--- /dev/null
+++ b/tools/build/feature/test-libcrypto.c
@@ -0,0 +1,17 @@
+#include <openssl/sha.h>
+#include <openssl/md5.h>
+
+int main(void)
+{
+	MD5_CTX context;
+	unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
+	unsigned char dat[] = "12345";
+
+	MD5_Init(&context);
+	MD5_Update(&context, &dat[0], sizeof(dat));
+	MD5_Final(&md[0], &context);
+
+	SHA1(&dat[0], sizeof(dat), &md[0]);
+
+	return 0;
+}
diff --git a/tools/lib/api/Build b/tools/lib/api/Build
index e8b8a23..954c644 100644
--- a/tools/lib/api/Build
+++ b/tools/lib/api/Build
@@ -1,3 +1,4 @@
 libapi-y += fd/
 libapi-y += fs/
 libapi-y += cpu.o
+libapi-y += debug.o
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index d85904d..bbc82c6 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -18,6 +18,7 @@
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -I$(srctree)/tools/lib/api
 
 RM = rm -f
 
diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h
new file mode 100644
index 0000000..188f7880
--- /dev/null
+++ b/tools/lib/api/debug-internal.h
@@ -0,0 +1,20 @@
+#ifndef __API_DEBUG_INTERNAL_H__
+#define __API_DEBUG_INTERNAL_H__
+
+#include "debug.h"
+
+#define __pr(func, fmt, ...)	\
+do {				\
+	if ((func))		\
+		(func)("libapi: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+extern libapi_print_fn_t __pr_warning;
+extern libapi_print_fn_t __pr_info;
+extern libapi_print_fn_t __pr_debug;
+
+#define pr_warning(fmt, ...)	__pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)	__pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)	__pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+#endif /* __API_DEBUG_INTERNAL_H__ */
diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c
new file mode 100644
index 0000000..5fa5cf5
--- /dev/null
+++ b/tools/lib/api/debug.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include "debug.h"
+#include "debug-internal.h"
+
+static int __base_pr(const char *format, ...)
+{
+	va_list args;
+	int err;
+
+	va_start(args, format);
+	err = vfprintf(stderr, format, args);
+	va_end(args);
+	return err;
+}
+
+libapi_print_fn_t __pr_warning = __base_pr;
+libapi_print_fn_t __pr_info    = __base_pr;
+libapi_print_fn_t __pr_debug;
+
+void libapi_set_print(libapi_print_fn_t warn,
+		      libapi_print_fn_t info,
+		      libapi_print_fn_t debug)
+{
+	__pr_warning = warn;
+	__pr_info    = info;
+	__pr_debug   = debug;
+}
diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h
new file mode 100644
index 0000000..a0872f6
--- /dev/null
+++ b/tools/lib/api/debug.h
@@ -0,0 +1,10 @@
+#ifndef __API_DEBUG_H__
+#define __API_DEBUG_H__
+
+typedef int (*libapi_print_fn_t)(const char *, ...);
+
+void libapi_set_print(libapi_print_fn_t warn,
+		      libapi_print_fn_t info,
+		      libapi_print_fn_t debug);
+
+#endif /* __API_DEBUG_H__ */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 459599d..ef78c22 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -13,6 +13,7 @@
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "debug-internal.h"
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
@@ -300,6 +301,56 @@
 	return err;
 }
 
+#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
+
+int filename__read_str(const char *filename, char **buf, size_t *sizep)
+{
+	size_t size = 0, alloc_size = 0;
+	void *bf = NULL, *nbf;
+	int fd, n, err = 0;
+	char sbuf[STRERR_BUFSIZE];
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		return -errno;
+
+	do {
+		if (size == alloc_size) {
+			alloc_size += BUFSIZ;
+			nbf = realloc(bf, alloc_size);
+			if (!nbf) {
+				err = -ENOMEM;
+				break;
+			}
+
+			bf = nbf;
+		}
+
+		n = read(fd, bf + size, alloc_size - size);
+		if (n < 0) {
+			if (size) {
+				pr_warning("read failed %d: %s\n", errno,
+					 strerror_r(errno, sbuf, sizeof(sbuf)));
+				err = 0;
+			} else
+				err = -errno;
+
+			break;
+		}
+
+		size += n;
+	} while (n > 0);
+
+	if (!err) {
+		*sizep = size;
+		*buf   = bf;
+	} else
+		free(bf);
+
+	close(fd);
+	return err;
+}
+
 int sysfs__read_ull(const char *entry, unsigned long long *value)
 {
 	char path[PATH_MAX];
@@ -326,6 +377,19 @@
 	return filename__read_int(path, value);
 }
 
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+	return filename__read_str(path, buf, sizep);
+}
+
 int sysctl__read_int(const char *sysctl, int *value)
 {
 	char path[PATH_MAX];
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index d024a7f..9f65980 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -2,6 +2,7 @@
 #define __API_FS__
 
 #include <stdbool.h>
+#include <unistd.h>
 
 /*
  * On most systems <limits.h> would have given us this, but  not on some systems
@@ -26,8 +27,10 @@
 
 int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 #endif /* __API_FS__ */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8334a5a..7e543c3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -201,6 +201,7 @@
 			Elf_Data *data;
 		} *reloc;
 		int nr_reloc;
+		int maps_shndx;
 	} efile;
 	/*
 	 * All loaded bpf_object is linked in a list, which is
@@ -350,6 +351,7 @@
 	 */
 	obj->efile.obj_buf = obj_buf;
 	obj->efile.obj_buf_sz = obj_buf_sz;
+	obj->efile.maps_shndx = -1;
 
 	obj->loaded = false;
 
@@ -529,12 +531,12 @@
 }
 
 static int
-bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+bpf_object__init_maps_name(struct bpf_object *obj)
 {
 	int i;
 	Elf_Data *symbols = obj->efile.symbols;
 
-	if (!symbols || maps_shndx < 0)
+	if (!symbols || obj->efile.maps_shndx < 0)
 		return -EINVAL;
 
 	for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
@@ -544,7 +546,7 @@
 
 		if (!gelf_getsym(symbols, i, &sym))
 			continue;
-		if (sym.st_shndx != maps_shndx)
+		if (sym.st_shndx != obj->efile.maps_shndx)
 			continue;
 
 		map_name = elf_strptr(obj->efile.elf,
@@ -572,7 +574,7 @@
 	Elf *elf = obj->efile.elf;
 	GElf_Ehdr *ep = &obj->efile.ehdr;
 	Elf_Scn *scn = NULL;
-	int idx = 0, err = 0, maps_shndx = -1;
+	int idx = 0, err = 0;
 
 	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
 	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -625,7 +627,7 @@
 		else if (strcmp(name, "maps") == 0) {
 			err = bpf_object__init_maps(obj, data->d_buf,
 						    data->d_size);
-			maps_shndx = idx;
+			obj->efile.maps_shndx = idx;
 		} else if (sh.sh_type == SHT_SYMTAB) {
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
@@ -674,8 +676,8 @@
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
 		return LIBBPF_ERRNO__FORMAT;
 	}
-	if (maps_shndx >= 0)
-		err = bpf_object__init_maps_name(obj, maps_shndx);
+	if (obj->efile.maps_shndx >= 0)
+		err = bpf_object__init_maps_name(obj);
 out:
 	return err;
 }
@@ -697,7 +699,8 @@
 static int
 bpf_program__collect_reloc(struct bpf_program *prog,
 			   size_t nr_maps, GElf_Shdr *shdr,
-			   Elf_Data *data, Elf_Data *symbols)
+			   Elf_Data *data, Elf_Data *symbols,
+			   int maps_shndx)
 {
 	int i, nrels;
 
@@ -724,9 +727,6 @@
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-		pr_debug("relocation: insn_idx=%u\n", insn_idx);
-
 		if (!gelf_getsym(symbols,
 				 GELF_R_SYM(rel.r_info),
 				 &sym)) {
@@ -735,6 +735,15 @@
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
+		if (sym.st_shndx != maps_shndx) {
+			pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
+				   prog->section_name, sym.st_shndx);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+		pr_debug("relocation: insn_idx=%u\n", insn_idx);
+
 		if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
 			pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
 				   insn_idx, insns[insn_idx].code);
@@ -863,7 +872,8 @@
 
 		err = bpf_program__collect_reloc(prog, nr_maps,
 						 shdr, data,
-						 obj->efile.symbols);
+						 obj->efile.symbols,
+						 obj->efile.maps_shndx);
 		if (err)
 			return err;
 	}
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 90d2bae..1d57af5 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile
@@ -100,7 +100,7 @@
 
 do_compile_shared_library =			\
 	($(print_shared_lib_compile)		\
-	$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so))
+	$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so))
 
 do_build_static_lib =				\
 	($(print_static_lib_build)		\
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
index 9be6633..d1c89cc 100644
--- a/tools/lib/lockdep/common.c
+++ b/tools/lib/lockdep/common.c
@@ -11,11 +11,6 @@
 bool debug_locks = true;
 bool debug_locks_silent;
 
-__attribute__((constructor)) static void liblockdep_init(void)
-{
-	lockdep_init();
-}
-
 __attribute__((destructor)) static void liblockdep_exit(void)
 {
 	debug_check_no_locks_held();
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
index a60c14b..6e66277 100644
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -44,7 +44,6 @@
 void lock_release(struct lockdep_map *lock, int nested,
 			unsigned long ip);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-extern void lockdep_init(void);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
 	{ .name = (_name), .key = (void *)(_key), }
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
index f42b7e9..a0a2e3a 100644
--- a/tools/lib/lockdep/lockdep.c
+++ b/tools/lib/lockdep/lockdep.c
@@ -1,2 +1,8 @@
 #include <linux/lockdep.h>
+
+/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
+#define hlist_for_each_entry_rcu	hlist_for_each_entry
+#define hlist_add_head_rcu		hlist_add_head
+#define hlist_del_rcu			hlist_del
+
 #include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 21cdf86..5284484 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c
@@ -439,7 +439,5 @@
 	ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
 #endif
 
-	lockdep_init();
-
 	__init_state = done;
 }
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
index 0f782ff..18211a5 100644
--- a/tools/lib/lockdep/tests/AA.c
+++ b/tools/lib/lockdep/tests/AA.c
@@ -1,13 +1,13 @@
 #include <liblockdep/mutex.h>
 
-void main(void)
+int main(void)
 {
-	pthread_mutex_t a, b;
+	pthread_mutex_t a;
 
 	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
 
 	pthread_mutex_lock(&a);
-	pthread_mutex_lock(&b);
 	pthread_mutex_lock(&a);
+
+	return 0;
 }
diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
new file mode 100644
index 0000000..0f782ff
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.c
@@ -0,0 +1,13 @@
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+	pthread_mutex_t a, b;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+
+	pthread_mutex_lock(&a);
+	pthread_mutex_lock(&b);
+	pthread_mutex_lock(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
new file mode 100644
index 0000000..cd807d7
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <pthread.h>
+
+pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
+pthread_barrier_t bar;
+
+void *ba_lock(void *arg)
+{
+	int ret, i;
+
+	pthread_mutex_lock(&b);
+
+	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+		pthread_barrier_destroy(&bar);
+
+	pthread_mutex_lock(&a);
+
+	pthread_mutex_unlock(&a);
+	pthread_mutex_unlock(&b);
+}
+
+int main(void)
+{
+	pthread_t t;
+
+	pthread_barrier_init(&bar, NULL, 2);
+
+	if (pthread_create(&t, NULL, ba_lock, NULL)) {
+		fprintf(stderr, "pthread_create() failed\n");
+		return 1;
+	}
+	pthread_mutex_lock(&a);
+
+	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+		pthread_barrier_destroy(&bar);
+
+	pthread_mutex_lock(&b);
+
+	pthread_mutex_unlock(&b);
+	pthread_mutex_unlock(&a);
+
+	pthread_join(t, NULL);
+
+	return 0;
+}
diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h
index 6386dc3..fd3e56a 100644
--- a/tools/lib/lockdep/uinclude/linux/compiler.h
+++ b/tools/lib/lockdep/uinclude/linux/compiler.h
@@ -3,6 +3,7 @@
 
 #define __used		__attribute__((__unused__))
 #define unlikely
+#define READ_ONCE(x) (x)
 #define WRITE_ONCE(x, val) x=(val)
 #define RCU_INIT_POINTER(p, v) p=(v)
 
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index c3bd294..190cc88 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1951,6 +1951,7 @@
 		   strcmp(token, "*") == 0 ||
 		   strcmp(token, "^") == 0 ||
 		   strcmp(token, "/") == 0 ||
+		   strcmp(token, "%") == 0 ||
 		   strcmp(token, "<") == 0 ||
 		   strcmp(token, ">") == 0 ||
 		   strcmp(token, "<=") == 0 ||
@@ -2397,6 +2398,12 @@
 				break;
 			*val = left + right;
 			break;
+		case '~':
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = ~right;
+			break;
 		default:
 			do_warning("unknown op '%s'", arg->op.op);
 			ret = 0;
@@ -2634,6 +2641,7 @@
 
 free_field:
 	free_arg(arg->hex.field);
+	arg->hex.field = NULL;
 out:
 	*tok = NULL;
 	return EVENT_ERROR;
@@ -2658,8 +2666,10 @@
 
 free_size:
 	free_arg(arg->int_array.count);
+	arg->int_array.count = NULL;
 free_field:
 	free_arg(arg->int_array.field);
+	arg->int_array.field = NULL;
 out:
 	*tok = NULL;
 	return EVENT_ERROR;
@@ -3689,6 +3699,9 @@
 		case '/':
 			val = left / right;
 			break;
+		case '%':
+			val = left % right;
+			break;
 		case '*':
 			val = left * right;
 			break;
@@ -4971,7 +4984,7 @@
 						break;
 					}
 				}
-				if (pevent->long_size == 8 && ls &&
+				if (pevent->long_size == 8 && ls == 1 &&
 				    sizeof(long) != 8) {
 					char *p;
 
@@ -5335,19 +5348,74 @@
 	return false;
 }
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record, bool use_trace_clock)
+/**
+ * pevent_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
 {
-	static const char *spaces = "                    "; /* 20 spaces */
-	struct event_format *event;
+	int type;
+
+	if (record->size < 0) {
+		do_warning("ug! negative record size %d", record->size);
+		return NULL;
+	}
+
+	type = trace_parse_common_type(pevent, record->data);
+
+	return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	void *data = record->data;
+	const char *comm;
+	int pid;
+
+	pid = parse_common_pid(pevent, data);
+	comm = find_cmdline(pevent, pid);
+
+	if (pevent->latency_format) {
+		trace_seq_printf(s, "%8.8s-%-5d %3d",
+		       comm, pid, record->cpu);
+	} else
+		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * pevent_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock)
+{
 	unsigned long secs;
 	unsigned long usecs;
 	unsigned long nsecs;
-	const char *comm;
-	void *data = record->data;
-	int type;
-	int pid;
-	int len;
 	int p;
 	bool use_usec_format;
 
@@ -5358,28 +5426,11 @@
 		nsecs = record->ts - secs * NSECS_PER_SEC;
 	}
 
-	if (record->size < 0) {
-		do_warning("ug! negative record size %d", record->size);
-		return;
-	}
-
-	type = trace_parse_common_type(pevent, data);
-
-	event = pevent_find_event(pevent, type);
-	if (!event) {
-		do_warning("ug! no event found for type %d", type);
-		return;
-	}
-
-	pid = parse_common_pid(pevent, data);
-	comm = find_cmdline(pevent, pid);
-
 	if (pevent->latency_format) {
-		trace_seq_printf(s, "%8.8s-%-5d %3d",
-		       comm, pid, record->cpu);
+		trace_seq_printf(s, " %3d", record->cpu);
 		pevent_data_lat_fmt(pevent, s, record);
 	} else
-		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+		trace_seq_printf(s, " [%03d]", record->cpu);
 
 	if (use_usec_format) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
@@ -5387,14 +5438,36 @@
 			p = 9;
 		} else {
 			usecs = (nsecs + 500) / NSECS_PER_USEC;
+			/* To avoid usecs larger than 1 sec */
+			if (usecs >= 1000000) {
+				usecs -= 1000000;
+				secs++;
+			}
 			p = 6;
 		}
 
-		trace_seq_printf(s, " %5lu.%0*lu: %s: ",
-					secs, p, usecs, event->name);
+		trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
 	} else
-		trace_seq_printf(s, " %12llu: %s: ",
-					record->ts, event->name);
+		trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * pevent_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	static const char *spaces = "                    "; /* 20 spaces */
+	int len;
+
+	trace_seq_printf(s, " %s: ", event->name);
 
 	/* Space out the event names evenly. */
 	len = strlen(event->name);
@@ -5404,6 +5477,23 @@
 	pevent_event_info(s, event, record);
 }
 
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+			struct pevent_record *record, bool use_trace_clock)
+{
+	struct event_format *event;
+
+	event = pevent_find_event_by_record(pevent, record);
+	if (!event) {
+		do_warning("ug! no event found for type %d",
+			   trace_parse_common_type(pevent, record->data));
+		return;
+	}
+
+	pevent_print_event_task(pevent, s, event, record);
+	pevent_print_event_time(pevent, s, event, record, use_trace_clock);
+	pevent_print_event_data(pevent, s, event, record);
+}
+
 static int events_id_cmp(const void *a, const void *b)
 {
 	struct event_format * const * ea = a;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 706d9bc..9ffde37 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -628,6 +628,16 @@
 				 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock);
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			struct pevent_record *record, bool use_trace_clock);
 
@@ -694,6 +704,9 @@
 struct event_format *
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
+
 void pevent_data_lat_fmt(struct pevent *pevent,
 			 struct trace_seq *s, struct pevent_record *record);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index b9ca1e3..15949e2 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -8,7 +8,7 @@
 SYNOPSIS
 --------
 [verse]
-'perf config' -l | --list
+'perf config' [<file-option>] -l | --list
 
 DESCRIPTION
 -----------
@@ -21,6 +21,14 @@
 --list::
 	Show current config variables, name and value, for all sections.
 
+--user::
+	For writing and reading options: write to user
+	'$HOME/.perfconfig' file or read it.
+
+--system::
+	For writing and reading options: write to system-wide
+	'$(sysconfdir)/perfconfig' or read it.
+
 CONFIGURATION FILE
 ------------------
 
@@ -30,6 +38,10 @@
 The file '$(sysconfdir)/perfconfig' can be used to
 store a system-wide default configuration.
 
+When reading or writing, the values are read from the system and user
+configuration files by default, and options '--system' and '--user'
+can be used to tell the command to read from or write to only that location.
+
 Syntax
 ~~~~~~
 
@@ -62,7 +74,7 @@
 		medium = green, default
 		normal = lightgray, default
 		selected = white, lightgray
-		code = blue, default
+		jump_arrows = blue, default
 		addr = magenta, default
 		root = white, blue
 
@@ -98,6 +110,347 @@
 		order = caller
 		sort-key = function
 
+Variables
+~~~~~~~~~
+
+colors.*::
+	The variables for customizing the colors used in the output for the
+	'report', 'top' and 'annotate' in the TUI. They should specify the
+	foreground and background colors, separated by a comma, for example:
+
+		medium = green, lightgray
+
+	If you want to use the color configured for you terminal, just leave it
+	as 'default', for example:
+
+		medium = default, lightgray
+
+	Available colors:
+	red, yellow, green, cyan, gray, black, blue,
+	white, default, magenta, lightgray
+
+	colors.top::
+		'top' means a overhead percentage which is more than 5%.
+		And values of this variable specify percentage colors.
+		Basic key values are foreground-color 'red' and
+		background-color 'default'.
+	colors.medium::
+		'medium' means a overhead percentage which has more than 0.5%.
+		Default values are 'green' and 'default'.
+	colors.normal::
+		'normal' means the rest of overhead percentages
+		except 'top', 'medium', 'selected'.
+		Default values are 'lightgray' and 'default'.
+	colors.selected::
+		This selects the colors for the current entry in a list of entries
+		from sub-commands (top, report, annotate).
+		Default values are 'black' and 'lightgray'.
+	colors.jump_arrows::
+		Colors for jump arrows on assembly code listings
+		such as 'jns', 'jmp', 'jane', etc.
+		Default values are 'blue', 'default'.
+	colors.addr::
+		This selects colors for addresses from 'annotate'.
+		Default values are 'magenta', 'default'.
+	colors.root::
+		Colors for headers in the output of a sub-commands (top, report).
+		Default values are 'white', 'blue'.
+
+tui.*, gtk.*::
+	Subcommands that can be configured here are 'top', 'report' and 'annotate'.
+	These values are booleans, for example:
+
+	[tui]
+		top = true
+
+	will make the TUI be the default for the 'top' subcommand. Those will be
+	available if the required libs were detected at tool build time.
+
+buildid.*::
+	buildid.dir::
+		Each executable and shared library in modern distributions comes with a
+		content based identifier that, if available, will be inserted in a
+		'perf.data' file header to, at analysis time find what is needed to do
+		symbol resolution, code annotation, etc.
+
+		The recording tools also stores a hard link or copy in a per-user
+		directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
+		and /proc/kcore files to be used at analysis time.
+
+		The buildid.dir variable can be used to either change this directory
+		cache location, or to disable it altogether. If you want to disable it,
+		set buildid.dir to /dev/null. The default is $HOME/.debug
+
+annotate.*::
+	These options work only for TUI.
+	These are in control of addresses, jump function, source code
+	in lines of assembly code from a specific program.
+
+	annotate.hide_src_code::
+		If a program which is analyzed has source code,
+		this option lets 'annotate' print a list of assembly code with the source code.
+		For example, let's see a part of a program. There're four lines.
+		If this option is 'true', they can be printed
+		without source code from a program as below.
+
+		│        push   %rbp
+		│        mov    %rsp,%rbp
+		│        sub    $0x10,%rsp
+		│        mov    (%rdi),%rdx
+
+		But if this option is 'false', source code of the part
+		can be also printed as below. Default is 'false'.
+
+		│      struct rb_node *rb_next(const struct rb_node *node)
+		│      {
+		│        push   %rbp
+		│        mov    %rsp,%rbp
+		│        sub    $0x10,%rsp
+		│              struct rb_node *parent;
+		│
+		│              if (RB_EMPTY_NODE(node))
+		│        mov    (%rdi),%rdx
+		│              return n;
+
+        annotate.use_offset::
+		Basing on a first address of a loaded function, offset can be used.
+		Instead of using original addresses of assembly code,
+		addresses subtracted from a base address can be printed.
+		Let's illustrate an example.
+		If a base address is 0XFFFFFFFF81624d50 as below,
+
+		ffffffff81624d50 <load0>
+
+		an address on assembly code has a specific absolute address as below
+
+		ffffffff816250b8:│  mov    0x8(%r14),%rdi
+
+		but if use_offset is 'true', an address subtracted from a base address is printed.
+		Default is true. This option is only applied to TUI.
+
+		             368:│  mov    0x8(%r14),%rdi
+
+	annotate.jump_arrows::
+		There can be jump instruction among assembly code.
+		Depending on a boolean value of jump_arrows,
+		arrows can be printed or not which represent
+		where do the instruction jump into as below.
+
+		│     ┌──jmp    1333
+		│     │  xchg   %ax,%ax
+		│1330:│  mov    %r15,%r10
+		│1333:└─→cmp    %r15,%r14
+
+		If jump_arrow is 'false', the arrows isn't printed as below.
+		Default is 'false'.
+
+		│      ↓ jmp    1333
+		│        xchg   %ax,%ax
+		│1330:   mov    %r15,%r10
+		│1333:   cmp    %r15,%r14
+
+        annotate.show_linenr::
+		When showing source code if this option is 'true',
+		line numbers are printed as below.
+
+		│1628         if (type & PERF_SAMPLE_IDENTIFIER) {
+		│     ↓ jne    508
+		│1628                 data->id = *array;
+		│1629                 array++;
+		│1630         }
+
+		However if this option is 'false', they aren't printed as below.
+		Default is 'false'.
+
+		│             if (type & PERF_SAMPLE_IDENTIFIER) {
+		│     ↓ jne    508
+		│                     data->id = *array;
+		│                     array++;
+		│             }
+
+        annotate.show_nr_jumps::
+		Let's see a part of assembly code.
+
+		│1382:   movb   $0x1,-0x270(%rbp)
+
+		If use this, the number of branches jumping to that address can be printed as below.
+		Default is 'false'.
+
+		│1 1382:   movb   $0x1,-0x270(%rbp)
+
+        annotate.show_total_period::
+		To compare two records on an instruction base, with this option
+		provided, display total number of samples that belong to a line
+		in assembly code. If this option is 'true', total periods are printed
+		instead of percent values as below.
+
+		  302 │      mov    %eax,%eax
+
+		But if this option is 'false', percent values for overhead are printed i.e.
+		Default is 'false'.
+
+		99.93 │      mov    %eax,%eax
+
+hist.*::
+	hist.percentage::
+		This option control the way to calculate overhead of filtered entries -
+		that means the value of this option is effective only if there's a
+		filter (by comm, dso or symbol name). Suppose a following example:
+
+		       Overhead  Symbols
+		       ........  .......
+		        33.33%     foo
+		        33.33%     bar
+		        33.33%     baz
+
+	       This is an original overhead and we'll filter out the first 'foo'
+	       entry. The value of 'relative' would increase the overhead of 'bar'
+	       and 'baz' to 50.00% for each, while 'absolute' would show their
+	       current overhead (33.33%).
+
+ui.*::
+	ui.show-headers::
+		This option controls display of column headers (like 'Overhead' and 'Symbol')
+		in 'report' and 'top'. If this option is false, they are hidden.
+		This option is only applied to TUI.
+
+call-graph.*::
+	When sub-commands 'top' and 'report' work with -g/—-children
+	there're options in control of call-graph.
+
+	call-graph.record-mode::
+		The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
+		The value of 'dwarf' is effective only if perf detect needed library
+		(libunwind or a recent version of libdw).
+		'lbr' only work for cpus that support it.
+
+	call-graph.dump-size::
+		The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
+		When using dwarf into record-mode, the default size will be used if omitted.
+
+	call-graph.print-type::
+		The print-types can be graph (graph absolute), fractal (graph relative),
+		flat and folded. This option controls a way to show overhead for each callchain
+		entry. Suppose a following example.
+
+                Overhead  Symbols
+                ........  .......
+                  40.00%  foo
+                          |
+                          ---foo
+                             |
+                             |--50.00%--bar
+                             |          main
+                             |
+                              --50.00%--baz
+                                        main
+
+		This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
+		half and half so 'fractal' shows 50.00% for each
+		(meaning that it assumes 100% total overhead of 'foo').
+
+		The 'graph' uses absolute overhead value of 'foo' as total so each of
+		'bar' and 'baz' callchain will have 20.00% of overhead.
+		If 'flat' is used, single column and linear exposure of call chains.
+		'folded' mean call chains are displayed in a line, separated by semicolons.
+
+	call-graph.order::
+		This option controls print order of callchains. The default is
+		'callee' which means callee is printed at top and then followed by its
+		caller and so on. The 'caller' prints it in reverse order.
+
+		If this option is not set and report.children or top.children is
+		set to true (or the equivalent command line option is given),
+		the default value of this option is changed to 'caller' for the
+		execution of 'perf report' or 'perf top'. Other commands will
+		still default to 'callee'.
+
+	call-graph.sort-key::
+		The callchains are merged if they contain same information.
+		The sort-key option determines a way to compare the callchains.
+		A value of 'sort-key' can be 'function' or 'address'.
+		The default is 'function'.
+
+	call-graph.threshold::
+		When there're many callchains it'd print tons of lines. So perf omits
+		small callchains under a certain overhead (threshold) and this option
+		control the threshold. Default is 0.5 (%). The overhead is calculated
+		by value depends on call-graph.print-type.
+
+	call-graph.print-limit::
+		This is a maximum number of lines of callchain printed for a single
+		histogram entry. Default is 0 which means no limitation.
+
+report.*::
+	report.percent-limit::
+		This one is mostly the same as call-graph.threshold but works for
+		histogram entries. Entries having an overhead lower than this
+		percentage will not be printed. Default is '0'. If percent-limit
+		is '10', only entries which have more than 10% of overhead will be
+		printed.
+
+	report.queue-size::
+		This option sets up the maximum allocation size of the internal
+		event queue for ordering events. Default is 0, meaning no limit.
+
+	report.children::
+		'Children' means functions called from another function.
+		If this option is true, 'perf report' cumulates callchains of children
+		and show (accumulated) total overhead as well as 'Self' overhead.
+		Please refer to the 'perf report' manual. The default is 'true'.
+
+	report.group::
+		This option is to show event group information together.
+		Example output with this turned on, notice that there is one column
+		per event in the group, ref-cycles and cycles:
+
+		# group: {ref-cycles,cycles}
+		# ========
+		#
+		# Samples: 7K of event 'anon group { ref-cycles, cycles }'
+		# Event count (approx.): 6876107743
+		#
+		#         Overhead  Command      Shared Object               Symbol
+		# ................  .......  .................  ...................
+		#
+		    99.84%  99.76%  noploop  noploop            [.] main
+		     0.07%   0.00%  noploop  ld-2.15.so         [.] strcmp
+		     0.03%   0.00%  noploop  [kernel.kallsyms]  [k] timerqueue_del
+
+top.*::
+	top.children::
+		Same as 'report.children'. So if it is enabled, the output of 'top'
+		command will have 'Children' overhead column as well as 'Self' overhead
+		column by default.
+		The default is 'true'.
+
+man.*::
+	man.viewer::
+		This option can assign a tool to view manual pages when 'help'
+		subcommand was invoked. Supported tools are 'man', 'woman'
+		(with emacs client) and 'konqueror'. Default is 'man'.
+
+		New man viewer tool can be also added using 'man.<tool>.cmd'
+		or use different path using 'man.<tool>.path' config option.
+
+pager.*::
+	pager.<subcommand>::
+		When the subcommand is run on stdio, determine whether it uses
+		pager or not based on this value. Default is 'unspecified'.
+
+kmem.*::
+	kmem.default::
+		This option decides which allocator is to be analyzed if neither
+		'--slab' nor '--page' option is used. Default is 'slab'.
+
+record.*::
+	record.build-id::
+		This option can be 'cache', 'no-cache' or 'skip'.
+		'cache' is to post-process data and save/update the binaries into
+		the build-id cache (in ~/.debug). This is the default.
+		But if this option is 'no-cache', it will not update the build-id cache.
+		'skip' skips post-processing and does not update the cache.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 0b1cede..87b2588 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -53,6 +53,13 @@
 --strip::
 	Use with --itrace to strip out non-synthesized events.
 
+-j::
+--jit::
+	Process jitdump files by injecting the mmap records corresponding to jitted
+	functions. This option also generates the ELF images for each jitted function
+	found in the jitdumps files captured in the input perf.data file. Use this option
+	if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fbceb63..19aa175 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -341,6 +341,12 @@
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--all-kernel::
+Configure all used events to run in kernel space.
+
+--all-user::
+Configure all used events to run in user space.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8a301f6..1211399 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -117,6 +117,22 @@
 	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
 	and symbol_to, see '--branch-stack'.
 
+	If the --mem-mode option is used, the following sort keys are also available
+	(incompatible with --branch-stack):
+	symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+	- symbol_daddr: name of data symbol being executed on at the time of sample
+	- dso_daddr: name of library or module containing the data being executed
+	on at the time of the sample
+	- locked: whether the bus was locked at the time of the sample
+	- tlb: type of tlb access for the data at the time of the sample
+	- mem: type of memory access for the data at the time of the sample
+	- snoop: type of snoop (if any) for the data at the time of the sample
+	- dcacheline: the cacheline the data address is on at the time of the sample
+
+	And the default sort keys are changed to local_weight, mem, sym, dso,
+	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
 	If the data file has tracepoint event(s), following (dynamic) sort keys
 	are also available:
 	trace, trace_fields, [<event>.]<field>[/raw]
@@ -151,22 +167,6 @@
 	By default, every sort keys not specified in -F will be appended
 	automatically.
 
-	If --mem-mode option is used, following sort keys are also available
-	(incompatible with --branch-stack):
-	symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
-
-	- symbol_daddr: name of data symbol being executed on at the time of sample
-	- dso_daddr: name of library or module containing the data being executed
-	on at the time of sample
-	- locked: whether the bus was locked at the time of sample
-	- tlb: type of tlb access for the data at the time of sample
-	- mem: type of memory access for the data at the time of sample
-	- snoop: type of snoop (if any) for the data at the time of sample
-	- dcacheline: the cacheline the data address is on at the time of sample
-
-	And default sort keys are changed to local_weight, mem, sym, dso,
-	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
-
 -p::
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
@@ -351,7 +351,10 @@
 
 --percent-limit::
 	Do not show entries which have an overhead under that percent.
-	(Default: 0).
+	(Default: 0).  Note that this option also sets the percent limit (threshold)
+	of callchains.  However the default value of callchain threshold is
+	different than the default value of hist entries.  Please see the
+	--call-graph option for details.
 
 --percentage::
 	Determine how to display the overhead percentage of filtered entries.
@@ -398,6 +401,9 @@
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+	Enable hierarchical output.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 52ef7a9..04f23b4 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -69,6 +69,14 @@
 --scale::
 	scale/normalize counter values
 
+-d::
+--detailed::
+	print more detailed statistics, can be specified up to 3 times
+
+	   -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
 -r::
 --repeat=<n>::
 	repeat command and print average + stddev (max: 100). 0 means forever.
@@ -139,6 +147,10 @@
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
 	example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
 is a useful mode to detect imbalance between sockets.  To enable this mode,
@@ -211,6 +223,29 @@
 
  Wall-clock time elapsed:   719.554352 msecs
 
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+	- optional usec time stamp in fractions of second (with -I xxx)
+	- optional CPU, core, or socket identifier
+	- optional number of logical CPUs aggregated
+	- counter value
+	- unit of the counter value or empty
+	- event name
+	- run time of counter
+	- percentage of measurement time the counter was running
+	- optional variance if multiple values are collected with -r
+	- optional metric value
+	- optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index b0e60e1..19f046f 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -233,6 +233,9 @@
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+	Enable hierarchy output.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
index 767ea24..1d8d5bc 100644
--- a/tools/perf/Documentation/perfconfig.example
+++ b/tools/perf/Documentation/perfconfig.example
@@ -5,7 +5,7 @@
 	medium = green, lightgray
 	normal = black, lightgray
 	selected = lightgray, magenta
-	code = blue, lightgray
+	jump_arrows = blue, lightgray
 	addr = magenta, lightgray
 
 [tui]
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index e0ce957..5950b5a 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -27,3 +27,4 @@
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index dcd9a70..32a64e6 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -68,6 +68,20 @@
 	$(print_msg)
 	$(make)
 
+ifdef MAKECMDGOALS
+has_clean := 0
+ifneq ($(filter clean,$(MAKECMDGOALS)),)
+  has_clean := 1
+endif # clean
+
+ifeq ($(has_clean),1)
+  rest := $(filter-out clean,$(MAKECMDGOALS))
+  ifneq ($(rest),)
+$(rest): clean
+  endif # rest
+endif # has_clean
+endif # MAKECMDGOALS
+
 #
 # The clean target is not really parallel, don't print the jobs info:
 #
@@ -75,10 +89,17 @@
 	$(make)
 
 #
-# The build-test target is not really parallel, don't print the jobs info:
+# The build-test target is not really parallel, don't print the jobs info,
+# it also uses only the tests/make targets that don't pollute the source
+# repository, i.e. that uses O= or builds the tarpkg outside the source
+# repo directories.
+#
+# For a full test, use:
+#
+# make -C tools/perf -f tests/make
 #
 build-test:
-	@$(MAKE) SHUF=1 -f tests/make --no-print-directory
+	@$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
 
 #
 # All other targets get passed through:
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0a22407..4a4fad4 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -58,6 +58,9 @@
 #
 # Define NO_LIBBIONIC if you do not want bionic support
 #
+# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
+# used for generating build-ids for ELFs generated by jitdump.
+#
 # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
 # for dwarf backtrace post unwind.
 #
@@ -77,6 +80,9 @@
 # Define NO_AUXTRACE if you do not want AUX area tracing support
 #
 # Define NO_LIBBPF if you do not want BPF support
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -133,6 +139,8 @@
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
+LD += $(EXTRA_LDFLAGS)
+
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 
 RM      = rm -f
@@ -162,10 +170,28 @@
 endif
 endif
 
+# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
+# Without this setting the output feature dump file misses some features, for
+# example, liberty. Select all checkers so we won't get an incomplete feature
+# dump file.
 ifeq ($(config),1)
+ifdef MAKECMDGOALS
+ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
+FEATURE_TESTS := all
+endif
+endif
 include config/Makefile
 endif
 
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
+endif
+
 export prefix bindir sharedir sysconfdir DESTDIR
 
 # sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +462,7 @@
 	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 $(LIBBPF): fixdep FORCE
-	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
+	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
@@ -606,11 +632,22 @@
 	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
 		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
-		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c
+		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
 	$(python-clean)
 
 #
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+	@cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+	@echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+	@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
+#
 # Trick: if ../../.git does not exist - we are building out of tree for example,
 # then force version regeneration:
 #
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 7fbca17..18b1351 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 7fbca17..18b1351 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..56e05f1 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,6 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 0000000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+	{0x4, "H_REMOVE"},					\
+	{0x8, "H_ENTER"},					\
+	{0xc, "H_READ"},					\
+	{0x10, "H_CLEAR_MOD"},					\
+	{0x14, "H_CLEAR_REF"},					\
+	{0x18, "H_PROTECT"},					\
+	{0x1c, "H_GET_TCE"},					\
+	{0x20, "H_PUT_TCE"},					\
+	{0x24, "H_SET_SPRG0"},					\
+	{0x28, "H_SET_DABR"},					\
+	{0x2c, "H_PAGE_INIT"},					\
+	{0x30, "H_SET_ASR"},					\
+	{0x34, "H_ASR_ON"},					\
+	{0x38, "H_ASR_OFF"},					\
+	{0x3c, "H_LOGICAL_CI_LOAD"},				\
+	{0x40, "H_LOGICAL_CI_STORE"},				\
+	{0x44, "H_LOGICAL_CACHE_LOAD"},				\
+	{0x48, "H_LOGICAL_CACHE_STORE"},			\
+	{0x4c, "H_LOGICAL_ICBI"},				\
+	{0x50, "H_LOGICAL_DCBF"},				\
+	{0x54, "H_GET_TERM_CHAR"},				\
+	{0x58, "H_PUT_TERM_CHAR"},				\
+	{0x5c, "H_REAL_TO_LOGICAL"},				\
+	{0x60, "H_HYPERVISOR_DATA"},				\
+	{0x64, "H_EOI"},					\
+	{0x68, "H_CPPR"},					\
+	{0x6c, "H_IPI"},					\
+	{0x70, "H_IPOLL"},					\
+	{0x74, "H_XIRR"},					\
+	{0x78, "H_MIGRATE_DMA"},				\
+	{0x7c, "H_PERFMON"},					\
+	{0xdc, "H_REGISTER_VPA"},				\
+	{0xe0, "H_CEDE"},					\
+	{0xe4, "H_CONFER"},					\
+	{0xe8, "H_PROD"},					\
+	{0xec, "H_GET_PPP"},					\
+	{0xf0, "H_SET_PPP"},					\
+	{0xf4, "H_PURR"},					\
+	{0xf8, "H_PIC"},					\
+	{0xfc, "H_REG_CRQ"},					\
+	{0x100, "H_FREE_CRQ"},					\
+	{0x104, "H_VIO_SIGNAL"},				\
+	{0x108, "H_SEND_CRQ"},					\
+	{0x110, "H_COPY_RDMA"},					\
+	{0x114, "H_REGISTER_LOGICAL_LAN"},			\
+	{0x118, "H_FREE_LOGICAL_LAN"},				\
+	{0x11c, "H_ADD_LOGICAL_LAN_BUFFER"},			\
+	{0x120, "H_SEND_LOGICAL_LAN"},				\
+	{0x124, "H_BULK_REMOVE"},				\
+	{0x130, "H_MULTICAST_CTRL"},				\
+	{0x134, "H_SET_XDABR"},					\
+	{0x138, "H_STUFF_TCE"},					\
+	{0x13c, "H_PUT_TCE_INDIRECT"},				\
+	{0x14c, "H_CHANGE_LOGICAL_LAN_MAC"},			\
+	{0x150, "H_VTERM_PARTNER_INFO"},			\
+	{0x154, "H_REGISTER_VTERM"},				\
+	{0x158, "H_FREE_VTERM"},				\
+	{0x15c, "H_RESET_EVENTS"},				\
+	{0x160, "H_ALLOC_RESOURCE"},				\
+	{0x164, "H_FREE_RESOURCE"},				\
+	{0x168, "H_MODIFY_QP"},					\
+	{0x16c, "H_QUERY_QP"},					\
+	{0x170, "H_REREGISTER_PMR"},				\
+	{0x174, "H_REGISTER_SMR"},				\
+	{0x178, "H_QUERY_MR"},					\
+	{0x17c, "H_QUERY_MW"},					\
+	{0x180, "H_QUERY_HCA"},					\
+	{0x184, "H_QUERY_PORT"},				\
+	{0x188, "H_MODIFY_PORT"},				\
+	{0x18c, "H_DEFINE_AQP1"},				\
+	{0x190, "H_GET_TRACE_BUFFER"},				\
+	{0x194, "H_DEFINE_AQP0"},				\
+	{0x198, "H_RESIZE_MR"},					\
+	{0x19c, "H_ATTACH_MCQP"},				\
+	{0x1a0, "H_DETACH_MCQP"},				\
+	{0x1a4, "H_CREATE_RPT"},				\
+	{0x1a8, "H_REMOVE_RPT"},				\
+	{0x1ac, "H_REGISTER_RPAGES"},				\
+	{0x1b0, "H_DISABLE_AND_GETC"},				\
+	{0x1b4, "H_ERROR_DATA"},				\
+	{0x1b8, "H_GET_HCA_INFO"},				\
+	{0x1bc, "H_GET_PERF_COUNT"},				\
+	{0x1c0, "H_MANAGE_TRACE"},				\
+	{0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"},			\
+	{0x1d8, "H_POLL_PENDING"},				\
+	{0x1e4, "H_QUERY_INT_STATE"},				\
+	{0x244, "H_ILLAN_ATTRIBUTES"},				\
+	{0x250, "H_MODIFY_HEA_QP"},				\
+	{0x254, "H_QUERY_HEA_QP"},				\
+	{0x258, "H_QUERY_HEA"},					\
+	{0x25c, "H_QUERY_HEA_PORT"},				\
+	{0x260, "H_MODIFY_HEA_PORT"},				\
+	{0x264, "H_REG_BCMC"},					\
+	{0x268, "H_DEREG_BCMC"},				\
+	{0x26c, "H_REGISTER_HEA_RPAGES"},			\
+	{0x270, "H_DISABLE_AND_GET_HEA"},			\
+	{0x274, "H_GET_HEA_INFO"},				\
+	{0x278, "H_ALLOC_HEA_RESOURCE"},			\
+	{0x284, "H_ADD_CONN"},					\
+	{0x288, "H_DEL_CONN"},					\
+	{0x298, "H_JOIN"},					\
+	{0x2a4, "H_VASI_STATE"},				\
+	{0x2b0, "H_ENABLE_CRQ"},				\
+	{0x2b8, "H_GET_EM_PARMS"},				\
+	{0x2d0, "H_SET_MPP"},					\
+	{0x2d4, "H_GET_MPP"},					\
+	{0x2ec, "H_HOME_NODE_ASSOCIATIVITY"},			\
+	{0x2f4, "H_BEST_ENERGY"},				\
+	{0x2fc, "H_XIRR_X"},					\
+	{0x300, "H_RANDOM"},					\
+	{0x304, "H_COP"},					\
+	{0x314, "H_GET_MPP_X"},					\
+	{0x31c, "H_SET_MODE"},					\
+	{0xf000, "H_RTAS"}					\
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 0000000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x0,	"RETURN_TO_HOST"}, \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 0000000..74eee30
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,170 @@
+#include "util/kvm-stat.h"
+#include "util/parse-events.h"
+#include "util/debug.h"
+
+#include "book3s_hv_exits.h"
+#include "book3s_hcalls.h"
+
+#define NR_TPS 4
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 40;
+const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
+const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
+
+/* Tracepoints specific to ppc_book3s_hv */
+const char *ppc_book3s_hv_kvm_tp[] = {
+	"kvm_hv:kvm_guest_enter",
+	"kvm_hv:kvm_guest_exit",
+	"kvm_hv:kvm_hcall_enter",
+	"kvm_hv:kvm_hcall_exit",
+	NULL,
+};
+
+/* 1 extra placeholder for NULL */
+const char *kvm_events_tp[NR_TPS + 1];
+const char *kvm_exit_reason;
+
+static void hcall_event_get_key(struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct event_key *key)
+{
+	key->info = 0;
+	key->key = perf_evsel__intval(evsel, sample, "req");
+}
+
+static const char *get_hcall_exit_reason(u64 exit_code)
+{
+	struct exit_reasons_table *tbl = hcall_reasons;
+
+	while (tbl->reason != NULL) {
+		if (tbl->exit_code == exit_code)
+			return tbl->reason;
+		tbl++;
+	}
+
+	pr_debug("Unknown hcall code: %lld\n",
+	       (unsigned long long)exit_code);
+	return "UNKNOWN";
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+			    struct perf_sample *sample __maybe_unused,
+			    struct event_key *key __maybe_unused)
+{
+	return (!strcmp(evsel->name, kvm_events_tp[3]));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+			      struct perf_sample *sample, struct event_key *key)
+{
+	if (!strcmp(evsel->name, kvm_events_tp[2])) {
+		hcall_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+				   struct event_key *key,
+				   char *decode)
+{
+	const char *hcall_reason = get_hcall_exit_reason(key->key);
+
+	scnprintf(decode, decode_str_len, "%s", hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+	.is_begin_event = hcall_event_begin,
+	.is_end_event = hcall_event_end,
+	.decode_key = hcall_event_decode_key,
+	.name = "HCALL-EVENT",
+};
+
+static struct kvm_events_ops exit_events = {
+	.is_begin_event = exit_event_begin,
+	.is_end_event = exit_event_end,
+	.decode_key = exit_event_decode_key,
+	.name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+	{ .name = "vmexit", .ops = &exit_events },
+	{ .name = "hcall", .ops = &hcall_events },
+	{ NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+	NULL,
+};
+
+
+static int is_tracepoint_available(const char *str, struct perf_evlist *evlist)
+{
+	struct parse_events_error err;
+	int ret;
+
+	err.str = NULL;
+	ret = parse_events(evlist, str, &err);
+	if (err.str)
+		pr_err("%s : %s\n", str, err.str);
+	return ret;
+}
+
+static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm,
+				struct perf_evlist *evlist)
+{
+	const char **events_ptr;
+	int i, nr_tp = 0, err = -1;
+
+	/* Check for book3s_hv tracepoints */
+	for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) {
+		err = is_tracepoint_available(*events_ptr, evlist);
+		if (err)
+			return -1;
+		nr_tp++;
+	}
+
+	for (i = 0; i < nr_tp; i++)
+		kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i];
+
+	kvm_events_tp[i] = NULL;
+	kvm_exit_reason = "trap";
+	kvm->exit_reasons = hv_exit_reasons;
+	kvm->exit_reasons_isa = "HV";
+
+	return 0;
+}
+
+/* Wrapper to setup kvm tracepoints */
+static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist == NULL)
+		return -ENOMEM;
+
+	/* Right now, only supported on book3s_hv */
+	return ppc__setup_book3s_hv(kvm, evlist);
+}
+
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm)
+{
+	return ppc__setup_kvm_tp(kvm);
+}
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+	int ret;
+
+	ret = ppc__setup_kvm_tp(kvm);
+	if (ret) {
+		kvm->exit_reasons = NULL;
+		kvm->exit_reasons_isa = NULL;
+	}
+
+	return ret;
+}
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..ed57df2 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/sie.h>
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
 				    struct perf_sample *sample,
 				    struct event_key *key)
@@ -73,7 +79,7 @@
 	.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
 	"kvm:kvm_s390_sie_enter",
 	"kvm:kvm_s390_sie_exit",
 	"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 09ba923..269af21 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -3,3 +3,4 @@
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 3e89ba8..7f064eb 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -17,7 +17,7 @@
 	if (pid)
 		return pid;
 
-	while(1);
+	while(1)
 		sleep(5);
 	return 0;
 }
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index 7bb0d13..72193f19 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -59,7 +59,7 @@
 		u64 quot, rem;
 
 		quot = (cyc >> time_shift);
-		rem = cyc & ((1 << time_shift) - 1);
+		rem = cyc & (((u64)1 << time_shift) - 1);
 		delta = time_offset + quot * time_mult +
 			((rem * time_mult) >> time_shift);
 
@@ -103,6 +103,7 @@
 
 	sigfillset(&sa.sa_mask);
 	sa.sa_sigaction = segfault_handler;
+	sa.sa_flags = 0;
 	sigaction(SIGSEGV, &sa, NULL);
 
 	fd = sys_perf_event_open(&attr, 0, -1, -1,
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 8d8150f..d66f9ad 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -60,7 +60,9 @@
 	u64 misc;
 };
 
-static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused)
 {
 	return INTEL_BTS_AUXTRACE_PRIV_SIZE;
 }
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index f05daac..a339517 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -89,7 +89,7 @@
 
 	*config = attr.config;
 out_free:
-	parse_events__free_terms(terms);
+	parse_events_terms__delete(terms);
 	return err;
 }
 
@@ -273,7 +273,9 @@
 	return attr;
 }
 
-static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+			struct perf_evlist *evlist __maybe_unused)
 {
 	return INTEL_PT_AUXTRACE_PRIV_SIZE;
 }
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..b63d4be 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@
 	.name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@
 				  struct event_key *key,
 				  char *decode)
 {
-	scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+	scnprintf(decode, decode_str_len, "%#lx:%s",
 		  (unsigned long)key->key,
 		  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@
 				    struct event_key *key,
 				    char *decode)
 {
-	scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+	scnprintf(decode, decode_str_len, "%#llx:%s",
 		  (unsigned long long)key->key,
 		  key->info ? "POUT" : "PIN");
 }
@@ -121,7 +129,7 @@
 	.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
 	"kvm:kvm_entry",
 	"kvm:kvm_exit",
 	"kvm:kvm_mmio",
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index e4c2c30..5c3cce0 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,6 +1,11 @@
+
+/* Various wrappers to make the kernel .S file build in user-space: */
+
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
+#define _ASM_EXTABLE_FAULT(x, y)
+
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index cc5c126..cfe3663 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -245,7 +245,7 @@
 			hists__collapse_resort(hists, NULL);
 			/* Don't sort callchain */
 			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
-			hists__output_resort(hists, NULL);
+			perf_evsel__output_resort(pos, NULL);
 
 			if (symbol_conf.event_group &&
 			    !perf_evsel__is_group_leader(pos))
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d93bff7..632efc6 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -38,19 +38,7 @@
 
 static int build_id_cache__kcore_dir(char *dir, size_t sz)
 {
-	struct timeval tv;
-	struct tm tm;
-	char dt[32];
-
-	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
-		return -1;
-
-	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
-		return -1;
-
-	scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
-
-	return 0;
+	return fetch_current_timestamp(dir, sz);
 }
 
 static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index f04e804..c42448e 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -13,8 +13,10 @@
 #include "util/util.h"
 #include "util/debug.h"
 
+static bool use_system_config, use_user_config;
+
 static const char * const config_usage[] = {
-	"perf config [options]",
+	"perf config [<file-option>] [options]",
 	NULL
 };
 
@@ -25,6 +27,8 @@
 static struct option config_options[] = {
 	OPT_SET_UINT('l', "list", &actions,
 		     "show current config variables", ACTION_LIST),
+	OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+	OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"),
 	OPT_END()
 };
 
@@ -42,10 +46,23 @@
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int ret = 0;
+	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
 	argc = parse_options(argc, argv, config_options, config_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
+	if (use_system_config && use_user_config) {
+		pr_err("Error: only one config file at a time\n");
+		parse_options_usage(config_usage, config_options, "user", 0);
+		parse_options_usage(NULL, config_options, "system", 0);
+		return -1;
+	}
+
+	if (use_system_config)
+		config_exclusive_filename = perf_etc_perfconfig();
+	else if (use_user_config)
+		config_exclusive_filename = user_config;
+
 	switch (actions) {
 	case ACTION_LIST:
 		if (argc) {
@@ -53,9 +70,13 @@
 			parse_options_usage(config_usage, config_options, "l", 1);
 		} else {
 			ret = perf_config(show_config, NULL);
-			if (ret < 0)
+			if (ret < 0) {
+				const char * config_filename = config_exclusive_filename;
+				if (!config_exclusive_filename)
+					config_filename = user_config;
 				pr_err("Nothing configured, "
-				       "please check your ~/.perfconfig file\n");
+				       "please check your %s \n", config_filename);
+			}
 		}
 		break;
 	default:
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 36ccc2b..4d72359 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1264,8 +1264,6 @@
 	if (ret < 0)
 		return ret;
 
-	perf_config(perf_default_config, NULL);
-
 	argc = parse_options(argc, argv, options, diff_usage, 0);
 
 	if (symbol__init(NULL) < 0)
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 96c1a4c..49d55e2 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -86,8 +86,7 @@
 		return -1;
 	}
 
-	strbuf_remove(&buffer, 0, strlen("emacsclient"));
-	version = atoi(buffer.buf);
+	version = atoi(buffer.buf + strlen("emacsclient"));
 
 	if (version < 22) {
 		fprintf(stderr,
@@ -273,7 +272,7 @@
 	if (!prefixcmp(var, "man."))
 		return add_man_viewer_info(var, value);
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static struct cmdnames main_cmds, other_cmds;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0022e02..7fa6866 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -17,6 +17,7 @@
 #include "util/build-id.h"
 #include "util/data.h"
 #include "util/auxtrace.h"
+#include "util/jit.h"
 
 #include <subcmd/parse-options.h>
 
@@ -29,6 +30,7 @@
 	bool			sched_stat;
 	bool			have_auxtrace;
 	bool			strip;
+	bool			jit_mode;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
@@ -71,6 +73,15 @@
 	return perf_event__repipe_synth(tool, event);
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event __maybe_unused,
+			       struct ordered_events *oe __maybe_unused)
+{
+	return 0;
+}
+#endif
+
 static int perf_event__repipe_op2_synth(struct perf_tool *tool,
 					union perf_event *event,
 					struct perf_session *session
@@ -234,6 +245,31 @@
 	return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u64 n = 0;
+	int ret;
+
+	/*
+	 * if jit marker, then inject jit mmaps and generate ELF images
+	 */
+	ret = jit_process(inject->session, &inject->output, machine,
+			  event->mmap.filename, sample->pid, &n);
+	if (ret < 0)
+		return ret;
+	if (ret) {
+		inject->bytes_written += n;
+		return 0;
+	}
+	return perf_event__repipe_mmap(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_mmap2(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_sample *sample,
@@ -247,6 +283,31 @@
 	return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
+					union perf_event *event,
+					struct perf_sample *sample,
+					struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u64 n = 0;
+	int ret;
+
+	/*
+	 * if jit marker, then inject jit mmaps and generate ELF images
+	 */
+	ret = jit_process(inject->session, &inject->output, machine,
+			  event->mmap2.filename, sample->pid, &n);
+	if (ret < 0)
+		return ret;
+	if (ret) {
+		inject->bytes_written += n;
+		return 0;
+	}
+	return perf_event__repipe_mmap2(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_fork(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_sample *sample,
@@ -626,12 +687,16 @@
 	ret = perf_session__process_events(session);
 
 	if (!file_out->is_pipe) {
-		if (inject->build_ids) {
+		if (inject->build_ids)
 			perf_header__set_feat(&session->header,
 					      HEADER_BUILD_ID);
-			if (inject->have_auxtrace)
-				dsos__hit_all(session);
-		}
+		/*
+		 * Keep all buildids when there is unprocessed AUX data because
+		 * it is not known which ones the AUX trace hits.
+		 */
+		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
+		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
+			dsos__hit_all(session);
 		/*
 		 * The AUX areas have been removed and replaced with
 		 * synthesized hardware events, so clear the feature flag and
@@ -703,7 +768,7 @@
 	};
 	int ret;
 
-	const struct option options[] = {
+	struct option options[] = {
 		OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
 			    "Inject build-ids into the output stream"),
 		OPT_STRING('i', "input", &inject.input_name, "file",
@@ -713,6 +778,9 @@
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
+		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
 		OPT_INCR('v', "verbose", &verbose,
 			 "be more verbose (show build ids, etc)"),
 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
@@ -729,7 +797,9 @@
 		"perf inject [<options>]",
 		NULL
 	};
-
+#ifndef HAVE_JITDUMP
+	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
 	argc = parse_options(argc, argv, options, inject_usage, 0);
 
 	/*
@@ -755,6 +825,29 @@
 	if (inject.session == NULL)
 		return -1;
 
+	if (inject.build_ids) {
+		/*
+		 * to make sure the mmap records are ordered correctly
+		 * and so that the correct especially due to jitted code
+		 * mmaps. We cannot generate the buildid hit list and
+		 * inject the jit mmaps at the same time for now.
+		 */
+		inject.tool.ordered_events = true;
+		inject.tool.ordering_requires_timestamps = true;
+	}
+#ifdef HAVE_JITDUMP
+	if (inject.jit_mode) {
+		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
+		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
+		inject.tool.ordered_events = true;
+		inject.tool.ordering_requires_timestamps = true;
+		/*
+		 * JIT MMAP injection injects all MMAP events in one go, so it
+		 * does not obey finished_round semantics.
+		 */
+		inject.tool.finished_round = perf_event__drop_oe;
+	}
+#endif
 	ret = symbol__init(&inject.session->header.env);
 	if (ret < 0)
 		goto out_delete;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1180105..4d3340c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1834,7 +1834,7 @@
 	return cmd_record(i, rec_argv, NULL);
 }
 
-static int kmem_config(const char *var, const char *value, void *cb)
+static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "kmem.default")) {
 		if (!strcmp(value, "slab"))
@@ -1847,7 +1847,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 4418d92..bff6664 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -30,7 +30,6 @@
 #include <math.h>
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include <asm/kvm_perf.h>
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -38,12 +37,12 @@
 			struct event_key *key)
 {
 	key->info = 0;
-	key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+	key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-	return !strcmp(evsel->name, KVM_EXIT_TRACE);
+	return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -59,7 +58,7 @@
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-	return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+	return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -91,7 +90,7 @@
 	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
 						  key->key);
 
-	scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
+	scnprintf(decode, decode_str_len, "%s", exit_reason);
 }
 
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
@@ -357,7 +356,7 @@
 	time_diff = sample->time - time_begin;
 
 	if (kvm->duration && time_diff > kvm->duration) {
-		char decode[DECODE_STR_LEN];
+		char decode[decode_str_len];
 
 		kvm->events_ops->decode_key(kvm, &event->key, decode);
 		if (!skip_event(decode)) {
@@ -385,7 +384,8 @@
 			return NULL;
 		}
 
-		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID);
+		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
+							  vcpu_id_str);
 		thread__set_priv(thread, vcpu_record);
 	}
 
@@ -574,7 +574,7 @@
 
 static void print_result(struct perf_kvm_stat *kvm)
 {
-	char decode[DECODE_STR_LEN];
+	char decode[decode_str_len];
 	struct kvm_event *event;
 	int vcpu = kvm->trace_vcpu;
 
@@ -585,7 +585,7 @@
 
 	pr_info("\n\n");
 	print_vcpu_info(kvm);
-	pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name);
+	pr_info("%*s ", decode_str_len, kvm->events_ops->name);
 	pr_info("%10s ", "Samples");
 	pr_info("%9s ", "Samples%");
 
@@ -604,7 +604,7 @@
 		min = get_event_min(event, vcpu);
 
 		kvm->events_ops->decode_key(kvm, &event->key, decode);
-		pr_info("%*s ", DECODE_STR_LEN, decode);
+		pr_info("%*s ", decode_str_len, decode);
 		pr_info("%10llu ", (unsigned long long)ecount);
 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
@@ -1132,6 +1132,11 @@
 		_p;			\
 	})
 
+int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
+{
+	return 0;
+}
+
 static int
 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 {
@@ -1148,7 +1153,14 @@
 		NULL
 	};
 	const char * const *events_tp;
+	int ret;
+
 	events_tp_size = 0;
+	ret = setup_kvm_events_tp(kvm);
+	if (ret < 0) {
+		pr_err("Unable to setup the kvm tracepoints\n");
+		return ret;
+	}
 
 	for (events_tp = kvm_events_tp; *events_tp; events_tp++)
 		events_tp_size++;
@@ -1377,6 +1389,12 @@
 	/*
 	 * generate the event list
 	 */
+	err = setup_kvm_events_tp(kvm);
+	if (err < 0) {
+		pr_err("Unable to setup the kvm tracepoints\n");
+		return err;
+	}
+
 	kvm->evlist = kvm_live_event_list();
 	if (kvm->evlist == NULL) {
 		err = -1;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 3901700..88aeac9 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -6,6 +6,8 @@
 #include "util/tool.h"
 #include "util/session.h"
 #include "util/data.h"
+#include "util/mem-events.h"
+#include "util/debug.h"
 
 #define MEM_OPERATION_LOAD	0x1
 #define MEM_OPERATION_STORE	0x2
@@ -21,11 +23,56 @@
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+static int parse_record_events(const struct option *opt,
+			       const char *str, int unset __maybe_unused)
+{
+	struct perf_mem *mem = *(struct perf_mem **)opt->value;
+	int j;
+
+	if (strcmp(str, "list")) {
+		if (!perf_mem_events__parse(str)) {
+			mem->operation = 0;
+			return 0;
+		}
+		exit(-1);
+	}
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		struct perf_mem_event *e = &perf_mem_events[j];
+
+		fprintf(stderr, "%-13s%-*s%s\n",
+			e->tag,
+			verbose ? 25 : 0,
+			verbose ? perf_mem_events__name(j) : "",
+			e->supported ? ": available" : "");
+	}
+	exit(0);
+}
+
+static const char * const __usage[] = {
+	"perf mem record [<options>] [<command>]",
+	"perf mem record [<options>] -- <command> [<options>]",
+	NULL
+};
+
+static const char * const *record_mem_usage = __usage;
+
 static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
 	int ret;
+	struct option options[] = {
+	OPT_CALLBACK('e', "event", &mem, "event",
+		     "event selector. use 'perf mem record -e list' to list available events",
+		     parse_record_events),
+	OPT_INCR('v', "verbose", &verbose,
+		 "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+	};
+
+	argc = parse_options(argc, argv, options, record_mem_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	rec_argc = argc + 7; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -35,23 +82,40 @@
 	rec_argv[i++] = "record";
 
 	if (mem->operation & MEM_OPERATION_LOAD)
+		perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
+	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
 		rec_argv[i++] = "-W";
 
 	rec_argv[i++] = "-d";
 
-	if (mem->operation & MEM_OPERATION_LOAD) {
-		rec_argv[i++] = "-e";
-		rec_argv[i++] = "cpu/mem-loads/pp";
-	}
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		if (!perf_mem_events[j].record)
+			continue;
 
-	if (mem->operation & MEM_OPERATION_STORE) {
-		rec_argv[i++] = "-e";
-		rec_argv[i++] = "cpu/mem-stores/pp";
-	}
+		if (!perf_mem_events[j].supported) {
+			pr_err("failed: event '%s' not supported\n",
+			       perf_mem_events__name(j));
+			return -1;
+		}
 
-	for (j = 1; j < argc; j++, i++)
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = perf_mem_events__name(j);
+	};
+
+	for (j = 0; j < argc; j++, i++)
 		rec_argv[i] = argv[j];
 
+	if (verbose > 0) {
+		pr_debug("calling: record ");
+
+		while (rec_argv[j]) {
+			pr_debug("%s ", rec_argv[j]);
+			j++;
+		}
+		pr_debug("\n");
+	}
+
 	ret = cmd_record(i, rec_argv, NULL);
 	free(rec_argv);
 	return ret;
@@ -298,6 +362,10 @@
 		NULL
 	};
 
+	if (perf_mem_events__init()) {
+		pr_err("failed: memory events not supported\n");
+		return -1;
+	}
 
 	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
 					mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 319712a..515510e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -32,6 +32,8 @@
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
+#include "asm/bug.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -49,7 +51,9 @@
 	const char		*progname;
 	int			realtime_prio;
 	bool			no_buildid;
+	bool			no_buildid_set;
 	bool			no_buildid_cache;
+	bool			no_buildid_cache_set;
 	bool			buildid_all;
 	unsigned long long	samples;
 };
@@ -320,7 +324,10 @@
 		} else {
 			pr_err("failed to mmap with %d (%s)\n", errno,
 				strerror_r(errno, msg, sizeof(msg)));
-			rc = -errno;
+			if (errno)
+				rc = -errno;
+			else
+				rc = -EINVAL;
 		}
 		goto out;
 	}
@@ -464,6 +471,29 @@
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
+static void
+record__finish_output(struct record *rec)
+{
+	struct perf_data_file *file = &rec->file;
+	int fd = perf_data_file__fd(file);
+
+	if (file->is_pipe)
+		return;
+
+	rec->session->header.data_size += rec->bytes_written;
+	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+
+	if (!rec->no_buildid) {
+		process_buildids(rec);
+
+		if (rec->buildid_all)
+			dsos__hit_all(rec->session);
+	}
+	perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+	return;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -482,6 +512,74 @@
 
 static void snapshot_sig_handler(int sig);
 
+static int record__synthesize(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	struct machine *machine = &session->machines.host;
+	struct perf_data_file *file = &rec->file;
+	struct record_opts *opts = &rec->opts;
+	struct perf_tool *tool = &rec->tool;
+	int fd = perf_data_file__fd(file);
+	int err = 0;
+
+	if (file->is_pipe) {
+		err = perf_event__synthesize_attrs(tool, session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			goto out;
+		}
+
+		if (have_tracepoints(&rec->evlist->entries)) {
+			/*
+			 * FIXME err <= 0 here actually means that
+			 * there were no tracepoints so its not really
+			 * an error, just that we don't need to
+			 * synthesize anything.  We really have to
+			 * return this more properly and also
+			 * propagate errors that now are calling die()
+			 */
+			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
+								  process_synthesized_event);
+			if (err <= 0) {
+				pr_err("Couldn't record tracing data.\n");
+				goto out;
+			}
+			rec->bytes_written += err;
+		}
+	}
+
+	if (rec->opts.full_auxtrace) {
+		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+					session, process_synthesized_event);
+		if (err)
+			goto out;
+	}
+
+	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+						 machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/kallsyms permission or run as root.\n");
+
+	err = perf_event__synthesize_modules(tool, process_synthesized_event,
+					     machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/modules permission or run as root.\n");
+
+	if (perf_guest) {
+		machines__process_guests(&session->machines,
+					 perf_event__synthesize_guest_os, tool);
+	}
+
+	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout);
+out:
+	return err;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -534,6 +632,16 @@
 		goto out_child;
 	}
 
+	err = bpf__apply_obj_config();
+	if (err) {
+		char errbuf[BUFSIZ];
+
+		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Apply config to BPF failed: %s\n",
+			 errbuf);
+		goto out_child;
+	}
+
 	/*
 	 * Normally perf_session__new would do this, but it doesn't have the
 	 * evlist.
@@ -566,63 +674,8 @@
 
 	machine = &session->machines.host;
 
-	if (file->is_pipe) {
-		err = perf_event__synthesize_attrs(tool, session,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			goto out_child;
-		}
-
-		if (have_tracepoints(&rec->evlist->entries)) {
-			/*
-			 * FIXME err <= 0 here actually means that
-			 * there were no tracepoints so its not really
-			 * an error, just that we don't need to
-			 * synthesize anything.  We really have to
-			 * return this more properly and also
-			 * propagate errors that now are calling die()
-			 */
-			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
-								  process_synthesized_event);
-			if (err <= 0) {
-				pr_err("Couldn't record tracing data.\n");
-				goto out_child;
-			}
-			rec->bytes_written += err;
-		}
-	}
-
-	if (rec->opts.full_auxtrace) {
-		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
-					session, process_synthesized_event);
-		if (err)
-			goto out_delete_session;
-	}
-
-	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine);
+	err = record__synthesize(rec);
 	if (err < 0)
-		pr_err("Couldn't record kernel reference relocation symbol\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/kallsyms permission or run as root.\n");
-
-	err = perf_event__synthesize_modules(tool, process_synthesized_event,
-					     machine);
-	if (err < 0)
-		pr_err("Couldn't record kernel module information.\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/modules permission or run as root.\n");
-
-	if (perf_guest) {
-		machines__process_guests(&session->machines,
-					 perf_event__synthesize_guest_os, tool);
-	}
-
-	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-					    process_synthesized_event, opts->sample_address,
-					    opts->proc_map_timeout);
-	if (err != 0)
 		goto out_child;
 
 	if (rec->realtime_prio) {
@@ -758,18 +811,8 @@
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
 
-	if (!err && !file->is_pipe) {
-		rec->session->header.data_size += rec->bytes_written;
-		file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-
-		if (!rec->no_buildid) {
-			process_buildids(rec);
-
-			if (rec->buildid_all)
-				dsos__hit_all(rec->session);
-		}
-		perf_session__write_header(rec->session, rec->evlist, fd, true);
-	}
+	if (!err)
+		record__finish_output(rec);
 
 	if (!err && !quiet) {
 		char samples[128];
@@ -1097,10 +1140,12 @@
 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 		    "don't sample"),
-	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
-		    "do not update the buildid cache"),
-	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
-		    "do not collect buildids in perf.data"),
+	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
+			&record.no_buildid_cache_set,
+			"do not update the buildid cache"),
+	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
+			&record.no_buildid_set,
+			"do not collect buildids in perf.data"),
 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 		     "monitor event in cgroup name only",
 		     parse_cgroups),
@@ -1136,6 +1181,12 @@
 			"per thread proc mmap processing timeout in ms"),
 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
 		    "Record context switch events"),
+	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
+			 "Configure all used events to run in kernel space.",
+			 PARSE_OPT_EXCLUSIVE),
+	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
+			 "Configure all used events to run in user space.",
+			 PARSE_OPT_EXCLUSIVE),
 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
 		   "clang binary to use for compiling BPF scriptlets"),
 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2bf537f..7eea49f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -75,7 +75,10 @@
 		return 0;
 	}
 	if (!strcmp(var, "report.percent-limit")) {
-		rep->min_percent = strtof(value, NULL);
+		double pcnt = strtof(value, NULL);
+
+		rep->min_percent = pcnt;
+		callchain_param.min_percent = pcnt;
 		return 0;
 	}
 	if (!strcmp(var, "report.children")) {
@@ -87,7 +90,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int hist_iter__report_callback(struct hist_entry_iter *iter,
@@ -466,10 +469,11 @@
 	return ret;
 }
 
-static void report__collapse_hists(struct report *rep)
+static int report__collapse_hists(struct report *rep)
 {
 	struct ui_progress prog;
 	struct perf_evsel *pos;
+	int ret = 0;
 
 	ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
 
@@ -481,7 +485,9 @@
 
 		hists->socket_filter = rep->socket_filter;
 
-		hists__collapse_resort(hists, &prog);
+		ret = hists__collapse_resort(hists, &prog);
+		if (ret < 0)
+			break;
 
 		/* Non-group events are considered as leader */
 		if (symbol_conf.event_group &&
@@ -494,6 +500,7 @@
 	}
 
 	ui_progress__finish();
+	return ret;
 }
 
 static void report__output_resort(struct report *rep)
@@ -504,7 +511,7 @@
 	ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
 
 	evlist__for_each(rep->session->evlist, pos)
-		hists__output_resort(evsel__hists(pos), &prog);
+		perf_evsel__output_resort(pos, &prog);
 
 	ui_progress__finish();
 }
@@ -561,7 +568,11 @@
 		}
 	}
 
-	report__collapse_hists(rep);
+	ret = report__collapse_hists(rep);
+	if (ret) {
+		ui__error("failed to process hist entry\n");
+		return ret;
+	}
 
 	if (session_done())
 		return 0;
@@ -633,8 +644,10 @@
 		    int unset __maybe_unused)
 {
 	struct report *rep = opt->value;
+	double pcnt = strtof(str, NULL);
 
-	rep->min_percent = strtof(str, NULL);
+	rep->min_percent = pcnt;
+	callchain_param.min_percent = pcnt;
 	return 0;
 }
 
@@ -798,6 +811,8 @@
 		    "only show processor socket that match with this filter"),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
@@ -907,13 +922,19 @@
 		symbol_conf.cumulate_callchain = false;
 	}
 
-	if (setup_sorting(session->evlist) < 0) {
-		if (sort_order)
-			parse_options_usage(report_usage, options, "s", 1);
-		if (field_order)
-			parse_options_usage(sort_order ? NULL : report_usage,
-					    options, "F", 1);
-		goto error;
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(report_usage, options, "F", 1);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto error;
+		}
+
+		sort__need_collapse = true;
 	}
 
 	/* Force tty output for header output and per-thread stat. */
@@ -925,6 +946,15 @@
 	else
 		use_browser = 0;
 
+	if (setup_sorting(session->evlist) < 0) {
+		if (sort_order)
+			parse_options_usage(report_usage, options, "s", 1);
+		if (field_order)
+			parse_options_usage(sort_order ? NULL : report_usage,
+					    options, "F", 1);
+		goto error;
+	}
+
 	if (report.header || report.header_only) {
 		perf_session__fprintf_info(session, stdout,
 					   report.show_full_info);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c691214..57f9a7e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -23,6 +23,7 @@
 #include "util/stat.h"
 #include <linux/bitmap.h>
 #include "asm/bug.h"
+#include "util/mem-events.h"
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -58,6 +59,9 @@
 	PERF_OUTPUT_IREGS	    = 1U << 14,
 	PERF_OUTPUT_BRSTACK	    = 1U << 15,
 	PERF_OUTPUT_BRSTACKSYM	    = 1U << 16,
+	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
+	PERF_OUTPUT_WEIGHT	    = 1U << 18,
+	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
 };
 
 struct output_option {
@@ -81,6 +85,9 @@
 	{.str = "iregs", .field = PERF_OUTPUT_IREGS},
 	{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
 	{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
+	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
+	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -101,7 +108,7 @@
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 
 	[PERF_TYPE_SOFTWARE] = {
@@ -111,7 +118,7 @@
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -121,7 +128,7 @@
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 				  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
 	},
 
 	[PERF_TYPE_RAW] = {
@@ -131,9 +138,10 @@
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
+			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 
 	[PERF_TYPE_BREAKPOINT] = {
@@ -145,7 +153,7 @@
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 };
 
@@ -242,6 +250,16 @@
 					   PERF_OUTPUT_ADDR, allow_user_set))
 		return -EINVAL;
 
+	if (PRINT_FIELD(DATA_SRC) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
+					PERF_OUTPUT_DATA_SRC))
+		return -EINVAL;
+
+	if (PRINT_FIELD(WEIGHT) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
+					PERF_OUTPUT_WEIGHT))
+		return -EINVAL;
+
 	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
 		pr_err("Display of symbols requested but neither sample IP nor "
 			   "sample address\nis selected. Hence, no addresses to convert "
@@ -608,6 +626,84 @@
 	printf("  %-4s ", str);
 }
 
+struct printer_data {
+	int line_no;
+	bool hit_nul;
+	bool is_printable;
+};
+
+static void
+print_sample_bpf_output_printer(enum binary_printer_ops op,
+				unsigned int val,
+				void *extra)
+{
+	unsigned char ch = (unsigned char)val;
+	struct printer_data *printer_data = extra;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printf("\n");
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printf("%17s", !printer_data->line_no ? "BPF output:" :
+						        "           ");
+		break;
+	case BINARY_PRINT_ADDR:
+		printf(" %04x:", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		printf(" %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		printf("   ");
+		break;
+	case BINARY_PRINT_SEP:
+		printf("  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		if (printer_data->hit_nul && ch)
+			printer_data->is_printable = false;
+
+		if (!isprint(ch)) {
+			printf("%c", '.');
+
+			if (!printer_data->is_printable)
+				break;
+
+			if (ch == '\0')
+				printer_data->hit_nul = true;
+			else
+				printer_data->is_printable = false;
+		} else {
+			printf("%c", ch);
+		}
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		printf(" ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		printf("\n");
+		printer_data->line_no++;
+		break;
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+}
+
+static void print_sample_bpf_output(struct perf_sample *sample)
+{
+	unsigned int nr_bytes = sample->raw_size;
+	struct printer_data printer_data = {0, false, true};
+
+	print_binary(sample->raw_data, nr_bytes, 8,
+		     print_sample_bpf_output_printer, &printer_data);
+
+	if (printer_data.is_printable && printer_data.hit_nul)
+		printf("%17s \"%s\"\n", "BPF string:",
+		       (char *)(sample->raw_data));
+}
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -634,6 +730,23 @@
 	return max;
 }
 
+static size_t data_src__printf(u64 data_src)
+{
+	struct mem_info mi = { .data_src.val = data_src };
+	char decode[100];
+	char out[100];
+	static int maxlen;
+	int len;
+
+	perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+	len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+	if (maxlen < len)
+		maxlen = len;
+
+	return printf("%-*s", maxlen, out);
+}
+
 static void process_event(struct perf_script *script, union perf_event *event,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al)
@@ -673,6 +786,12 @@
 	if (PRINT_FIELD(ADDR))
 		print_sample_addr(event, sample, thread, attr);
 
+	if (PRINT_FIELD(DATA_SRC))
+		data_src__printf(sample->data_src);
+
+	if (PRINT_FIELD(WEIGHT))
+		printf("%16" PRIu64, sample->weight);
+
 	if (PRINT_FIELD(IP)) {
 		if (!symbol_conf.use_callchain)
 			printf(" ");
@@ -692,6 +811,9 @@
 	else if (PRINT_FIELD(BRSTACKSYM))
 		print_sample_brstacksym(event, sample, thread, attr);
 
+	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+		print_sample_bpf_output(sample);
+
 	printf("\n");
 }
 
@@ -1090,23 +1212,6 @@
 	return NULL;
 }
 
-static struct script_spec *script_spec__findnew(const char *spec,
-						struct scripting_ops *ops)
-{
-	struct script_spec *s = script_spec__find(spec);
-
-	if (s)
-		return s;
-
-	s = script_spec__new(spec, ops);
-	if (!s)
-		return NULL;
-
-	script_spec__add(s);
-
-	return s;
-}
-
 int script_spec_register(const char *spec, struct scripting_ops *ops)
 {
 	struct script_spec *s;
@@ -1115,9 +1220,11 @@
 	if (s)
 		return -1;
 
-	s = script_spec__findnew(spec, ops);
+	s = script_spec__new(spec, ops);
 	if (!s)
 		return -1;
+	else
+		script_spec__add(s);
 
 	return 0;
 }
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 038e877..1f19f2f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -122,6 +122,7 @@
 static unsigned int		initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
+static bool			metric_only			= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -735,6 +736,191 @@
 	}
 }
 
+struct outstate {
+	FILE *fh;
+	bool newline;
+	const char *prefix;
+	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(void *ctx)
+{
+	struct outstate *os = ctx;
+
+	os->newline = true;
+}
+
+static void do_new_line_std(struct outstate *os)
+{
+	fputc('\n', os->fh);
+	fputs(os->prefix, os->fh);
+	aggr_printout(os->evsel, os->id, os->nr);
+	if (stat_config.aggr_mode == AGGR_NONE)
+		fprintf(os->fh, "        ");
+	fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(void *ctx, const char *color, const char *fmt,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	bool newline = os->newline;
+
+	os->newline = false;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%-*s", METRIC_LEN, "");
+		return;
+	}
+
+	if (newline)
+		do_new_line_std(os);
+
+	n = fprintf(out, " # ");
+	if (color)
+		n += color_fprintf(out, color, fmt, val);
+	else
+		n += fprintf(out, fmt, val);
+	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	aggr_printout(os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	char buf[1024];
+	unsigned mlen = METRIC_ONLY_LEN;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (color)
+		n = color_fprintf(out, color, fmt, val);
+	else
+		n = fprintf(out, fmt, val);
+	if (n > METRIC_ONLY_LEN)
+		n = METRIC_ONLY_LEN;
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+	fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (csv_output)
+		fprintf(os->fh, "%s%s", unit, csv_sep);
+	else
+		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -763,6 +949,28 @@
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+	int i;
+
+	if (!aggr_get_id)
+		return 0;
+
+	if (stat_config.aggr_mode == AGGR_NONE)
+		return id;
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -793,22 +1001,124 @@
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
-static void printout(int id, int nr, struct perf_evsel *counter, double uval)
+static void printout(int id, int nr, struct perf_evsel *counter, double uval,
+		     char *prefix, u64 run, u64 ena, double noise)
 {
-	int cpu = cpu_map__id_to_cpu(id);
+	struct perf_stat_output_ctx out;
+	struct outstate os = {
+		.fh = stat_config.output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
+	};
+	print_metric_t pm = print_metric_std;
+	void (*nl)(void *);
 
-	if (stat_config.aggr_mode == AGGR_GLOBAL)
-		cpu = 0;
+	if (metric_only) {
+		nl = new_line_metric;
+		if (csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
 
-	if (nsec_counter(counter))
+	if (csv_output && !metric_only) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[stat_config.aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
+	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (metric_only) {
+			pm(&os, NULL, "", "", 0);
+			return;
+		}
+		aggr_printout(counter, id, nr);
+
+		fprintf(stat_config.output, "%*s%s",
+			csv_output ? 0 : 18,
+			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+			csv_sep);
+
+		fprintf(stat_config.output, "%-*s%s",
+			csv_output ? 0 : unit_width,
+			counter->unit, csv_sep);
+
+		fprintf(stat_config.output, "%*s",
+			csv_output ? 0 : -25,
+			perf_evsel__name(counter));
+
+		if (counter->cgrp)
+			fprintf(stat_config.output, "%s%s",
+				csv_sep, counter->cgrp->name);
+
+		if (!csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		print_noise(counter, noise);
+		print_running(run, ena);
+		if (csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		return;
+	}
+
+	if (metric_only)
+		/* nothing */;
+	else if (nsec_counter(counter))
 		nsec_printout(id, nr, counter, uval);
 	else
 		abs_printout(id, nr, counter, uval);
 
-	if (!csv_output && !stat_config.interval)
-		perf_stat__print_shadow_stats(stat_config.output, counter,
-					      uval, cpu,
-					      stat_config.aggr_mode);
+	out.print_metric = pm;
+	out.new_line = nl;
+	out.ctx = &os;
+
+	if (csv_output && !metric_only) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+
+	perf_stat__print_shadow_stats(counter, uval,
+				first_shadow_cpu(counter, id),
+				&out);
+	if (!csv_output && !metric_only) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+}
+
+static void aggr_update_shadow(void)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
+		evlist__for_each(evsel_list, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			val = val * counter->scale;
+			perf_stat__update_shadow_stats(counter, &val,
+						       first_shadow_cpu(counter, id));
+		}
+	}
 }
 
 static void print_aggr(char *prefix)
@@ -818,12 +1128,23 @@
 	int cpu, s, s2, id, nr;
 	double uval;
 	u64 ena, run, val;
+	bool first;
 
 	if (!(aggr_map || aggr_get_id))
 		return;
 
+	aggr_update_shadow();
+
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
 	for (s = 0; s < aggr_map->nr; s++) {
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
 		id = aggr_map->map[s];
+		first = true;
 		evlist__for_each(evsel_list, counter) {
 			val = ena = run = 0;
 			nr = 0;
@@ -836,41 +1157,20 @@
 				run += perf_counts(counter->counts, cpu, 0)->run;
 				nr++;
 			}
-			if (prefix)
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(counter, id, nr);
+			}
+			if (prefix && !metric_only)
 				fprintf(output, "%s", prefix);
 
-			if (run == 0 || ena == 0) {
-				aggr_printout(counter, id, nr);
-
-				fprintf(output, "%*s%s",
-					csv_output ? 0 : 18,
-					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-					csv_sep);
-
-				fprintf(output, "%-*s%s",
-					csv_output ? 0 : unit_width,
-					counter->unit, csv_sep);
-
-				fprintf(output, "%*s",
-					csv_output ? 0 : -25,
-					perf_evsel__name(counter));
-
-				if (counter->cgrp)
-					fprintf(output, "%s%s",
-						csv_sep, counter->cgrp->name);
-
-				print_running(run, ena);
-				fputc('\n', output);
-				continue;
-			}
 			uval = val * counter->scale;
-			printout(id, nr, counter, uval);
-			if (!csv_output)
-				print_noise(counter, 1.0);
-
-			print_running(run, ena);
-			fputc('\n', output);
+			printout(id, nr, counter, uval, prefix, run, ena, 1.0);
+			if (!metric_only)
+				fputc('\n', output);
 		}
+		if (metric_only)
+			fputc('\n', output);
 	}
 }
 
@@ -895,12 +1195,7 @@
 			fprintf(output, "%s", prefix);
 
 		uval = val * counter->scale;
-		printout(thread, 0, counter, uval);
-
-		if (!csv_output)
-			print_noise(counter, 1.0);
-
-		print_running(run, ena);
+		printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
 		fputc('\n', output);
 	}
 }
@@ -914,43 +1209,19 @@
 	FILE *output = stat_config.output;
 	struct perf_stat_evsel *ps = counter->priv;
 	double avg = avg_stats(&ps->res_stats[0]);
-	int scaled = counter->counts->scaled;
 	double uval;
 	double avg_enabled, avg_running;
 
 	avg_enabled = avg_stats(&ps->res_stats[1]);
 	avg_running = avg_stats(&ps->res_stats[2]);
 
-	if (prefix)
+	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
 
-	if (scaled == -1 || !counter->supported) {
-		fprintf(output, "%*s%s",
-			csv_output ? 0 : 18,
-			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-			csv_sep);
-		fprintf(output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			counter->unit, csv_sep);
-		fprintf(output, "%*s",
-			csv_output ? 0 : -25,
-			perf_evsel__name(counter));
-
-		if (counter->cgrp)
-			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
-
-		print_running(avg_running, avg_enabled);
-		fputc('\n', output);
-		return;
-	}
-
 	uval = avg * counter->scale;
-	printout(-1, 0, counter, uval);
-
-	print_noise(counter, avg);
-
-	print_running(avg_running, avg_enabled);
-	fprintf(output, "\n");
+	printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
+	if (!metric_only)
+		fprintf(output, "\n");
 }
 
 /*
@@ -972,41 +1243,80 @@
 		if (prefix)
 			fprintf(output, "%s", prefix);
 
-		if (run == 0 || ena == 0) {
-			fprintf(output, "CPU%*d%s%*s%s",
-				csv_output ? 0 : -4,
-				perf_evsel__cpus(counter)->map[cpu], csv_sep,
-				csv_output ? 0 : 18,
-				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-				csv_sep);
-
-				fprintf(output, "%-*s%s",
-					csv_output ? 0 : unit_width,
-					counter->unit, csv_sep);
-
-				fprintf(output, "%*s",
-					csv_output ? 0 : -25,
-					perf_evsel__name(counter));
-
-			if (counter->cgrp)
-				fprintf(output, "%s%s",
-					csv_sep, counter->cgrp->name);
-
-			print_running(run, ena);
-			fputc('\n', output);
-			continue;
-		}
-
 		uval = val * counter->scale;
-		printout(cpu, 0, counter, uval);
-		if (!csv_output)
-			print_noise(counter, 1.0);
-		print_running(run, ena);
+		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
 
 		fputc('\n', output);
 	}
 }
 
+static void print_no_aggr_metric(char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evsel_list->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, stat_config.output);
+		evlist__for_each(evsel_list, counter) {
+			if (first) {
+				aggr_printout(counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+		}
+		fputc('\n', stat_config.output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static void print_metric_headers(char *prefix)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = stat_config.output
+	};
+
+	if (prefix)
+		fprintf(stat_config.output, "%s", prefix);
+
+	if (!csv_output)
+		fprintf(stat_config.output, "%*s",
+			aggr_header_lens[stat_config.aggr_mode], "");
+
+	/* Print metrics headers only */
+	evlist__for_each(evsel_list, counter) {
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(counter, 0,
+					      0,
+					      &out);
+	}
+	fputc('\n', stat_config.output);
+}
+
 static void print_interval(char *prefix, struct timespec *ts)
 {
 	FILE *output = stat_config.output;
@@ -1014,7 +1324,7 @@
 
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-	if (num_print_interval == 0 && !csv_output) {
+	if (num_print_interval == 0 && !csv_output && !metric_only) {
 		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
@@ -1101,6 +1411,17 @@
 	else
 		print_header(argc, argv);
 
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0)
+			print_metric_headers(prefix);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(stat_config.output, "%s", prefix);
+	}
+
 	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
 	case AGGR_SOCKET:
@@ -1113,10 +1434,16 @@
 	case AGGR_GLOBAL:
 		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
+		if (metric_only)
+			fputc('\n', stat_config.output);
 		break;
 	case AGGR_NONE:
-		evlist__for_each(evsel_list, counter)
-			print_counter(counter, prefix);
+		if (metric_only)
+			print_no_aggr_metric(prefix);
+		else {
+			evlist__for_each(evsel_list, counter)
+				print_counter(counter, prefix);
+		}
 		break;
 	case AGGR_UNSET:
 	default:
@@ -1237,6 +1564,8 @@
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
+	OPT_BOOLEAN(0, "metric-only", &metric_only,
+			"Only print computed metrics. No raw values"),
 	OPT_END()
 };
 
@@ -1435,7 +1764,7 @@
  */
 static int add_default_attributes(void)
 {
-	struct perf_event_attr default_attrs[] = {
+	struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
@@ -1443,8 +1772,14 @@
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+};
+	struct perf_event_attr frontend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+};
+	struct perf_event_attr backend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
+};
+	struct perf_event_attr default_attrs1[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
@@ -1561,7 +1896,19 @@
 	}
 
 	if (!evsel_list->nr_entries) {
-		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+			return -1;
+		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						frontend_attrs) < 0)
+				return -1;
+		}
+		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						backend_attrs) < 0)
+				return -1;
+		}
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
 			return -1;
 	}
 
@@ -1825,9 +2172,11 @@
 	if (evsel_list == NULL)
 		return -ENOMEM;
 
+	parse_events__shrink_config_terms();
 	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
 					(const char **) stat_usage,
 					PARSE_OPT_STOP_AT_NON_OPTION);
+	perf_stat__init_shadow_stats();
 
 	if (csv_sep) {
 		csv_output = true;
@@ -1858,6 +2207,16 @@
 		goto out;
 	}
 
+	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+		goto out;
+	}
+
+	if (metric_only && run_count > 1) {
+		fprintf(stderr, "--metric-only is not supported with -r\n");
+		goto out;
+	}
+
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bf01cbb..94af190 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -252,7 +252,8 @@
 	char bf[160];
 	int printed = 0;
 	const int win_width = top->winsize.ws_col - 1;
-	struct hists *hists = evsel__hists(top->sym_evsel);
+	struct perf_evsel *evsel = top->sym_evsel;
+	struct hists *hists = evsel__hists(evsel);
 
 	puts(CONSOLE_CLEAR);
 
@@ -288,7 +289,7 @@
 	}
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	hists__output_recalc_col_len(hists, top->print_entries - printed);
 	putchar('\n');
@@ -540,6 +541,7 @@
 static void perf_top__sort_new_samples(void *arg)
 {
 	struct perf_top *t = arg;
+	struct perf_evsel *evsel = t->sym_evsel;
 	struct hists *hists;
 
 	perf_top__reset_sample_counters(t);
@@ -547,7 +549,7 @@
 	if (t->evlist->selected != NULL)
 		t->sym_evsel = t->evlist->selected;
 
-	hists = evsel__hists(t->sym_evsel);
+	hists = evsel__hists(evsel);
 
 	if (t->evlist->enabled) {
 		if (t->zero) {
@@ -559,7 +561,7 @@
 	}
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 }
 
 static void *display_thread_tui(void *arg)
@@ -1063,7 +1065,7 @@
 	return parse_callchain_top_opt(arg);
 }
 
-static int perf_top_config(const char *var, const char *value, void *cb)
+static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "top.call-graph"))
 		var = "call-graph.record-mode"; /* fall-through */
@@ -1072,7 +1074,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int
@@ -1212,6 +1214,8 @@
 		     parse_branch_stack),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {
@@ -1239,10 +1243,30 @@
 		goto out_delete_evlist;
 	}
 
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(top_usage, options, "fields", 0);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto out_delete_evlist;
+		}
+	}
+
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
 	sort__need_collapse = 1;
 
+	if (top.use_stdio)
+		use_browser = 0;
+	else if (top.use_tui)
+		use_browser = 1;
+
+	setup_browser(false);
+
 	if (setup_sorting(top.evlist) < 0) {
 		if (sort_order)
 			parse_options_usage(top_usage, options, "s", 1);
@@ -1252,13 +1276,6 @@
 		goto out_delete_evlist;
 	}
 
-	if (top.use_stdio)
-		use_browser = 0;
-	else if (top.use_tui)
-		use_browser = 1;
-
-	setup_browser(false);
-
 	status = target__validate(target);
 	if (status) {
 		target__strerror(target, status, errbuf, BUFSIZ);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 20916dd..8dc98c5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -33,6 +33,7 @@
 #include "util/stat.h"
 #include "trace-event.h"
 #include "util/parse-events.h"
+#include "util/bpf-loader.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
@@ -1724,8 +1725,12 @@
 
 	sc->args = sc->tp_format->format.fields;
 	sc->nr_args = sc->tp_format->format.nr_fields;
-	/* drop nr field - not relevant here; does not exist on older kernels */
-	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+	/*
+	 * We need to check and discard the first variable '__syscall_nr'
+	 * or 'nr' that mean the syscall number. It is needless here.
+	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+	 */
+	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
 		sc->args = sc->args->next;
 		--sc->nr_args;
 	}
@@ -2177,6 +2182,37 @@
 	return 0;
 }
 
+static void bpf_output__printer(enum binary_printer_ops op,
+				unsigned int val, void *extra)
+{
+	FILE *output = extra;
+	unsigned char ch = (unsigned char)val;
+
+	switch (op) {
+	case BINARY_PRINT_CHAR_DATA:
+		fprintf(output, "%c", isprint(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_DATA_BEGIN:
+	case BINARY_PRINT_LINE_BEGIN:
+	case BINARY_PRINT_ADDR:
+	case BINARY_PRINT_NUM_DATA:
+	case BINARY_PRINT_NUM_PAD:
+	case BINARY_PRINT_SEP:
+	case BINARY_PRINT_CHAR_PAD:
+	case BINARY_PRINT_LINE_END:
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+				struct perf_sample *sample)
+{
+	print_binary(sample->raw_data, sample->raw_size, 8,
+		     bpf_output__printer, trace->output);
+}
+
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 				union perf_event *event __maybe_unused,
 				struct perf_sample *sample)
@@ -2189,7 +2225,9 @@
 
 	fprintf(trace->output, "%s:", evsel->name);
 
-	if (evsel->tp_format) {
+	if (perf_evsel__is_bpf_output(evsel)) {
+		bpf_output__fprintf(trace, sample);
+	} else if (evsel->tp_format) {
 		event_format__fprintf(evsel->tp_format, sample->cpu,
 				      sample->raw_data, sample->raw_size,
 				      trace->output);
@@ -2586,6 +2624,16 @@
 	if (err < 0)
 		goto out_error_open;
 
+	err = bpf__apply_obj_config();
+	if (err) {
+		char errbuf[BUFSIZ];
+
+		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Apply config to BPF failed: %s\n",
+			 errbuf);
+		goto out_error_open;
+	}
+
 	/*
 	 * Better not use !target__has_task() here because we need to cover the
 	 * case where no threads were specified in the command line, but a
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index e5959c1..eca6a91 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -61,50 +61,45 @@
 
 ifeq ($(LIBUNWIND_LIBS),)
   NO_LIBUNWIND := 1
-else
-  #
-  # For linking with debug library, run like:
-  #
-  #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-  #
-  ifdef LIBUNWIND_DIR
-    LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
-    LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
-  endif
-  LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
-
-  # Set per-feature check compilation flags
-  FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
-  FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 endif
+#
+# For linking with debug library, run like:
+#
+#   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+#
+ifdef LIBUNWIND_DIR
+  LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
+  LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+endif
+LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
+
+# Set per-feature check compilation flags
+FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 
 ifeq ($(NO_PERF_REGS),0)
   CFLAGS += -DHAVE_PERF_REGS_SUPPORT
 endif
 
-ifndef NO_LIBELF
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBDW_DIR=/opt/libdw/
-  ifdef LIBDW_DIR
-    LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
-    LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
+# for linking with debug library, run like:
+# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ifdef LIBDW_DIR
+  LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+  LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
 
-ifdef LIBBABELTRACE
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
-  ifdef LIBBABELTRACE_DIR
-    LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
-    LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+# for linking with debug library, run like:
+# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+ifdef LIBBABELTRACE_DIR
+  LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
+  LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
 FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
 # include ARCH specific config
@@ -145,28 +140,26 @@
   $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
-ifndef NO_LIBPYTHON
-  # Try different combinations to accommodate systems that only have
-  # python[2][-config] in weird combinations but always preferring
-  # python2 and python2-config as per pep-0394. If we catch a
-  # python[-config] in version 3, the version check will kill it.
-  PYTHON2 := $(if $(call get-executable,python2),python2,python)
-  override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
-  PYTHON2_CONFIG := \
-    $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
-  override PYTHON_CONFIG := \
-    $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+# Try different combinations to accommodate systems that only have
+# python[2][-config] in weird combinations but always preferring
+# python2 and python2-config as per pep-0394. If we catch a
+# python[-config] in version 3, the version check will kill it.
+PYTHON2 := $(if $(call get-executable,python2),python2,python)
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
+PYTHON2_CONFIG := \
+  $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+override PYTHON_CONFIG := \
+  $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
 
-  PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 
-  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
-  FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
-  FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
-endif
+FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
 
 CFLAGS += -fno-omit-frame-pointer
 CFLAGS += -ggdb3
@@ -181,7 +174,11 @@
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
+ifeq ($(FEATURES_DUMP),)
 include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all
@@ -331,6 +328,13 @@
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_DWARF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
 ifeq ($(ARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
@@ -407,6 +411,17 @@
   endif
 endif
 
+ifndef NO_LIBCRYPTO
+  ifneq ($(feature-libcrypto), 1)
+    msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
+    NO_LIBCRYPTO := 1
+  else
+    CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
+    EXTLIBS += -lcrypto
+    $(call detected,CONFIG_CRYPTO)
+  endif
+endif
+
 ifdef NO_NEWT
   NO_SLANG=1
 endif
diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile
new file mode 100644
index 0000000..5ce61a1
--- /dev/null
+++ b/tools/perf/jvmti/Makefile
@@ -0,0 +1,89 @@
+ARCH=$(shell uname -m)
+
+ifeq ($(ARCH), x86_64)
+JARCH=amd64
+endif
+ifeq ($(ARCH), armv7l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), armv6l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), aarch64)
+JARCH=aarch64
+endif
+ifeq ($(ARCH), ppc64)
+JARCH=powerpc
+endif
+ifeq ($(ARCH), ppc64le)
+JARCH=powerpc
+endif
+
+DESTDIR=/usr/local
+
+VERSION=1
+REVISION=0
+AGE=0
+
+LN=ln -sf
+RM=rm
+
+SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
+VLIBJVMTI=libjvmti.so.$(VERSION)
+SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
+SOLIBEXT=so
+
+# The following works at least on fedora 23, you may need the next
+# line for other distros.
+ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
+else
+  ifneq (,$(wildcard /usr/sbin/alternatives))
+    JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+  endif
+endif
+ifndef JDIR
+$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
+else
+  ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
+  $(error the openjdk development package appears to me missing, install and try again)
+  endif
+endif
+$(info Using Java from $(JDIR))
+# -lrt required in 32-bit mode for clock_gettime()
+LIBS=-lelf -lrt
+INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
+
+TARGETS=$(SLIBJVMTI)
+
+SRCS=libjvmti.c jvmti_agent.c
+OBJS=$(SRCS:.c=.o)
+SOBJS=$(OBJS:.o=.lo)
+OPT=-O2 -g -Werror -Wall
+
+CFLAGS=$(INCDIR) $(OPT)
+
+all: $(TARGETS)
+
+.c.o:
+	$(CC) $(CFLAGS) -c $*.c
+.c.lo:
+	$(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
+
+$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
+
+$(SLIBJVMTI):  $(SOBJS)
+	$(CC) $(CFLAGS) $(SLDFLAGS)  -o $@ $(SOBJS) $(LIBS)
+	$(LN) $@ libjvmti.$(SOLIBEXT)
+
+clean:
+	$(RM) -f *.o *.so.* *.so *.lo
+
+install:
+	-mkdir -p $(DESTDIR)/lib
+	install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
+	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
+	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
+	ldconfig
+
+.SUFFIXES: .c .S .o .lo
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
new file mode 100644
index 0000000..6461e02
--- /dev/null
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -0,0 +1,465 @@
+/*
+ * jvmti_agent.c: JVMTI agent interface
+ *
+ * Adapted from the Oprofile code in opagent.c:
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#include <sys/types.h>
+#include <sys/stat.h> /* for mkdir() */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <syscall.h> /* for gettid() */
+#include <err.h>
+
+#include "jvmti_agent.h"
+#include "../util/jitdump.h"
+
+#define JIT_LANG "java"
+
+static char jit_path[PATH_MAX];
+static void *marker_addr;
+
+/*
+ * padding buffer
+ */
+static const char pad_bytes[7];
+
+static inline pid_t gettid(void)
+{
+	return (pid_t)syscall(__NR_gettid);
+}
+
+static int get_e_machine(struct jitheader *hdr)
+{
+	ssize_t sret;
+	char id[16];
+	int fd, ret = -1;
+	int m = -1;
+	struct {
+		uint16_t e_type;
+		uint16_t e_machine;
+	} info;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	sret = read(fd, id, sizeof(id));
+	if (sret != sizeof(id))
+		goto error;
+
+	/* check ELF signature */
+	if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F')
+		goto error;
+
+	sret = read(fd, &info, sizeof(info));
+	if (sret != sizeof(info))
+		goto error;
+
+	m = info.e_machine;
+	if (m < 0)
+		m = 0; /* ELF EM_NONE */
+
+	hdr->elf_mach = m;
+	ret = 0;
+error:
+	close(fd);
+	return ret;
+}
+
+#define NSEC_PER_SEC	1000000000
+static int perf_clk_id = CLOCK_MONOTONIC;
+
+static inline uint64_t
+timespec_to_ns(const struct timespec *ts)
+{
+        return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline uint64_t
+perf_get_timestamp(void)
+{
+	struct timespec ts;
+	int ret;
+
+	ret = clock_gettime(perf_clk_id, &ts);
+	if (ret)
+		return 0;
+
+	return timespec_to_ns(&ts);
+}
+
+static int
+debug_cache_init(void)
+{
+	char str[32];
+	char *base, *p;
+	struct tm tm;
+	time_t t;
+	int ret;
+
+	time(&t);
+	localtime_r(&t, &tm);
+
+	base = getenv("JITDUMPDIR");
+	if (!base)
+		base = getenv("HOME");
+	if (!base)
+		base = ".";
+
+	strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
+
+	ret = mkdir(jit_path, 0755);
+	if (ret == -1) {
+		if (errno != EEXIST) {
+			warn("jvmti: cannot create jit cache dir %s", jit_path);
+			return -1;
+		}
+	}
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+	ret = mkdir(jit_path, 0755);
+	if (ret == -1) {
+		if (errno != EEXIST) {
+			warn("cannot create jit cache dir %s", jit_path);
+			return -1;
+		}
+	}
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+
+	p = mkdtemp(jit_path);
+	if (p != jit_path) {
+		warn("cannot create jit cache dir %s", jit_path);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+perf_open_marker_file(int fd)
+{
+	long pgsz;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	if (pgsz == -1)
+		return -1;
+
+	/*
+	 * we mmap the jitdump to create an MMAP RECORD in perf.data file.
+	 * The mmap is captured either live (perf record running when we mmap)
+	 * or  in deferred mode, via /proc/PID/maps
+	 * the MMAP record is used as a marker of a jitdump file for more meta
+	 * data info about the jitted code. Perf report/annotate detect this
+	 * special filename and process the jitdump file.
+	 *
+	 * mapping must be PROT_EXEC to ensure it is captured by perf record
+	 * even when not using -d option
+	 */
+	marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0);
+	return (marker_addr == MAP_FAILED) ? -1 : 0;
+}
+
+static void
+perf_close_marker_file(void)
+{
+	long pgsz;
+
+	if (!marker_addr)
+		return;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	if (pgsz == -1)
+		return;
+
+	munmap(marker_addr, pgsz);
+}
+
+void *jvmti_open(void)
+{
+	int pad_cnt;
+	char dump_path[PATH_MAX];
+	struct jitheader header;
+	int fd;
+	FILE *fp;
+
+	/*
+	 * check if clockid is supported
+	 */
+	if (!perf_get_timestamp())
+		warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+
+	memset(&header, 0, sizeof(header));
+
+	debug_cache_init();
+
+	/*
+	 * jitdump file name
+	 */
+	snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+
+	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
+	if (fd == -1)
+		return NULL;
+
+	/*
+	 * create perf.data maker for the jitdump file
+	 */
+	if (perf_open_marker_file(fd)) {
+		warnx("jvmti: failed to create marker file");
+		return NULL;
+	}
+
+	fp = fdopen(fd, "w+");
+	if (!fp) {
+		warn("jvmti: cannot create %s", dump_path);
+		close(fd);
+		goto error;
+	}
+
+	warnx("jvmti: jitdump in %s", dump_path);
+
+	if (get_e_machine(&header)) {
+		warn("get_e_machine failed\n");
+		goto error;
+	}
+
+	header.magic      = JITHEADER_MAGIC;
+	header.version    = JITHEADER_VERSION;
+	header.total_size = sizeof(header);
+	header.pid        = getpid();
+
+	/* calculate amount of padding '\0' */
+	pad_cnt = PADDING_8ALIGNED(header.total_size);
+	header.total_size += pad_cnt;
+
+	header.timestamp = perf_get_timestamp();
+
+	if (!fwrite(&header, sizeof(header), 1, fp)) {
+		warn("jvmti: cannot write dumpfile header");
+		goto error;
+	}
+
+	/* write padding '\0' if necessary */
+	if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) {
+		warn("jvmti: cannot write dumpfile header padding");
+		goto error;
+	}
+
+	return fp;
+error:
+	fclose(fp);
+	return NULL;
+}
+
+int
+jvmti_close(void *agent)
+{
+	struct jr_code_close rec;
+	FILE *fp = agent;
+
+	if (!fp) {
+		warnx("jvmti: incalid fd in close_agent");
+		return -1;
+	}
+
+	rec.p.id = JIT_CODE_CLOSE;
+	rec.p.total_size = sizeof(rec);
+
+	rec.p.timestamp = perf_get_timestamp();
+
+	if (!fwrite(&rec, sizeof(rec), 1, fp))
+		return -1;
+
+	fclose(fp);
+
+	fp = NULL;
+
+	perf_close_marker_file();
+
+	return 0;
+}
+
+int
+jvmti_write_code(void *agent, char const *sym,
+	uint64_t vma, void const *code, unsigned int const size)
+{
+	static int code_generation = 1;
+	struct jr_code_load rec;
+	size_t sym_len;
+	size_t padding_count;
+	FILE *fp = agent;
+	int ret = -1;
+
+	/* don't care about 0 length function, no samples */
+	if (size == 0)
+		return 0;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in write_native_code");
+		return -1;
+	}
+
+	sym_len = strlen(sym) + 1;
+
+	rec.p.id           = JIT_CODE_LOAD;
+	rec.p.total_size   = sizeof(rec) + sym_len;
+	padding_count      = PADDING_8ALIGNED(rec.p.total_size);
+	rec.p. total_size += padding_count;
+	rec.p.timestamp    = perf_get_timestamp();
+
+	rec.code_size  = size;
+	rec.vma        = vma;
+	rec.code_addr  = vma;
+	rec.pid	       = getpid();
+	rec.tid	       = gettid();
+
+	if (code)
+		rec.p.total_size += size;
+
+	/*
+	 * If JVM is multi-threaded, nultiple concurrent calls to agent
+	 * may be possible, so protect file writes
+	 */
+	flockfile(fp);
+
+	/*
+	 * get code index inside lock to avoid race condition
+	 */
+	rec.code_index = code_generation++;
+
+	ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+	fwrite_unlocked(sym, sym_len, 1, fp);
+
+	if (padding_count)
+		fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+
+	if (code)
+		fwrite_unlocked(code, size, 1, fp);
+
+	funlockfile(fp);
+
+	ret = 0;
+
+	return ret;
+}
+
+int
+jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
+		       jvmti_line_info_t *li, int nr_lines)
+{
+	struct jr_code_debug_info rec;
+	size_t sret, len, size, flen;
+	size_t padding_count;
+	uint64_t addr;
+	const char *fn = file;
+	FILE *fp = agent;
+	int i;
+
+	/*
+	 * no entry to write
+	 */
+	if (!nr_lines)
+		return 0;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in write_debug_info");
+		return -1;
+	}
+
+	flen = strlen(file) + 1;
+
+	rec.p.id        = JIT_CODE_DEBUG_INFO;
+	size            = sizeof(rec);
+	rec.p.timestamp = perf_get_timestamp();
+	rec.code_addr   = (uint64_t)(uintptr_t)code;
+	rec.nr_entry    = nr_lines;
+
+	/*
+	 * on disk source line info layout:
+	 * uint64_t : addr
+	 * int      : line number
+	 * int      : column discriminator
+	 * file[]   : source file name
+	 * padding  : pad to multiple of 8 bytes
+	 */
+	size += nr_lines * sizeof(struct debug_entry);
+	size += flen * nr_lines;
+	/*
+	 * pad to 8 bytes
+	 */
+	padding_count = PADDING_8ALIGNED(size);
+
+	rec.p.total_size = size + padding_count;
+
+	/*
+	 * If JVM is multi-threaded, nultiple concurrent calls to agent
+	 * may be possible, so protect file writes
+	 */
+	flockfile(fp);
+
+	sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+	if (sret != 1)
+		goto error;
+
+	for (i = 0; i < nr_lines; i++) {
+
+		addr = (uint64_t)li[i].pc;
+		len  = sizeof(addr);
+		sret = fwrite_unlocked(&addr, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		len  = sizeof(li[0].line_number);
+		sret = fwrite_unlocked(&li[i].line_number, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		len  = sizeof(li[0].discrim);
+		sret = fwrite_unlocked(&li[i].discrim, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		sret = fwrite_unlocked(fn, flen, 1, fp);
+		if (sret != 1)
+			goto error;
+	}
+	if (padding_count)
+		sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+		if (sret != 1)
+			goto error;
+
+	funlockfile(fp);
+	return 0;
+error:
+	funlockfile(fp);
+	return -1;
+}
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
new file mode 100644
index 0000000..bedf5d0
--- /dev/null
+++ b/tools/perf/jvmti/jvmti_agent.h
@@ -0,0 +1,36 @@
+#ifndef __JVMTI_AGENT_H__
+#define __JVMTI_AGENT_H__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <jvmti.h>
+
+#define __unused __attribute__((unused))
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct {
+	unsigned long	pc;
+	int		line_number;
+	int		discrim; /* discriminator -- 0 for now */
+} jvmti_line_info_t;
+
+void *jvmti_open(void);
+int   jvmti_close(void *agent);
+int   jvmti_write_code(void *agent, char const *symbol_name,
+		       uint64_t vma, void const *code,
+		       const unsigned int code_size);
+
+int   jvmti_write_debug_info(void *agent,
+		             uint64_t code,
+			     const char *file,
+			     jvmti_line_info_t *li,
+			     int nr_lines);
+
+#if defined(__cplusplus)
+}
+
+#endif
+#endif /* __JVMTI_H__ */
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
new file mode 100644
index 0000000..ac12e4b
--- /dev/null
+++ b/tools/perf/jvmti/libjvmti.c
@@ -0,0 +1,304 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <jvmti.h>
+#include <jvmticmlr.h>
+#include <limits.h>
+
+#include "jvmti_agent.h"
+
+static int has_line_numbers;
+void *jvmti_agent;
+
+static jvmtiError
+do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+		    jvmti_line_info_t *tab, jint *nr)
+{
+	jint i, lines = 0;
+	jint nr_lines = 0;
+	jvmtiLineNumberEntry *loc_tab = NULL;
+	jvmtiError ret;
+
+	ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
+	if (ret != JVMTI_ERROR_NONE)
+		return ret;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (loc_tab[i].start_location < bci) {
+			tab[lines].pc = (unsigned long)pc;
+			tab[lines].line_number = loc_tab[i].line_number;
+			tab[lines].discrim = 0; /* not yet used */
+			lines++;
+		} else {
+			break;
+		}
+	}
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
+	*nr = lines;
+	return JVMTI_ERROR_NONE;
+}
+
+static jvmtiError
+get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines)
+{
+	const jvmtiCompiledMethodLoadRecordHeader *hdr;
+	jvmtiCompiledMethodLoadInlineRecord *rec;
+	jvmtiLineNumberEntry *lne = NULL;
+	PCStackInfo *c;
+	jint nr, ret;
+	int nr_total = 0;
+	int i, lines_total = 0;
+
+	if (!(tab && nr_lines))
+		return JVMTI_ERROR_NULL_POINTER;
+
+	/*
+	 * Phase 1 -- get the number of lines necessary
+	 */
+	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+			for (i = 0; i < rec->numpcs; i++) {
+				c = rec->pcinfo + i;
+				nr = 0;
+				/*
+				 * unfortunately, need a tab to get the number of lines!
+				 */
+				ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
+				if (ret == JVMTI_ERROR_NONE) {
+					/* free what was allocated for nothing */
+					(*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
+					nr_total += (int)nr;
+				}
+			}
+		}
+	}
+
+	if (nr_total == 0)
+		return JVMTI_ERROR_NOT_FOUND;
+
+	/*
+	 * Phase 2 -- allocate big enough line table
+	 */
+	*tab = malloc(nr_total * sizeof(**tab));
+	if (!*tab)
+		return JVMTI_ERROR_OUT_OF_MEMORY;
+
+	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+			for (i = 0; i < rec->numpcs; i++) {
+				c = rec->pcinfo + i;
+				nr = 0;
+				ret = do_get_line_numbers(jvmti, c->pc,
+							  c->methods[0],
+							  c->bcis[0],
+							  *tab + lines_total,
+							  &nr);
+				if (ret == JVMTI_ERROR_NONE)
+					lines_total += nr;
+			}
+		}
+	}
+	*nr_lines = lines_total;
+	return JVMTI_ERROR_NONE;
+}
+
+static void JNICALL
+compiled_method_load_cb(jvmtiEnv *jvmti,
+			jmethodID method,
+			jint code_size,
+			void const *code_addr,
+			jint map_length,
+			jvmtiAddrLocationMap const *map,
+			const void *compile_info)
+{
+	jvmti_line_info_t *line_tab = NULL;
+	jclass decl_class;
+	char *class_sign = NULL;
+	char *func_name = NULL;
+	char *func_sign = NULL;
+	char *file_name= NULL;
+	char fn[PATH_MAX];
+	uint64_t addr = (uint64_t)(uintptr_t)code_addr;
+	jvmtiError ret;
+	int nr_lines = 0; /* in line_tab[] */
+	size_t len;
+
+	ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
+						&decl_class);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: cannot get declaring class");
+		return;
+	}
+
+	if (has_line_numbers && map && map_length) {
+		ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
+		if (ret != JVMTI_ERROR_NONE) {
+			warnx("jvmti: cannot get line table for method");
+			nr_lines = 0;
+		}
+	}
+
+	ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: cannot get source filename ret=%d", ret);
+		goto error;
+	}
+
+	ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
+					  &class_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: getclassignature failed");
+		goto error;
+	}
+
+	ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
+				      &func_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: failed getmethodname");
+		goto error;
+	}
+
+	/*
+	 * Assume path name is class hierarchy, this is a common practice with Java programs
+	 */
+	if (*class_sign == 'L') {
+		int j, i = 0;
+		char *p = strrchr(class_sign, '/');
+		if (p) {
+			/* drop the 'L' prefix and copy up to the final '/' */
+			for (i = 0; i < (p - class_sign); i++)
+				fn[i] = class_sign[i+1];
+		}
+		/*
+		 * append file name, we use loops and not string ops to avoid modifying
+		 * class_sign which is used later for the symbol name
+		 */
+		for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
+			fn[i] = file_name[j];
+		fn[i] = '\0';
+	} else {
+		/* fallback case */
+		strcpy(fn, file_name);
+	}
+	/*
+	 * write source line info record if we have it
+	 */
+	if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
+		warnx("jvmti: write_debug_info() failed");
+
+	len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
+	{
+		char str[len];
+		snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign);
+
+		if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size))
+			warnx("jvmti: write_code() failed");
+	}
+error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+	free(line_tab);
+}
+
+static void JNICALL
+code_generated_cb(jvmtiEnv *jvmti,
+		  char const *name,
+		  void const *code_addr,
+		  jint code_size)
+{
+	uint64_t addr = (uint64_t)(unsigned long)code_addr;
+	int ret;
+
+	ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size);
+	if (ret)
+		warnx("jvmti: write_code() failed for code_generated");
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
+{
+	jvmtiEventCallbacks cb;
+	jvmtiCapabilities caps1;
+	jvmtiJlocationFormat format;
+	jvmtiEnv *jvmti = NULL;
+	jint ret;
+
+	jvmti_agent = jvmti_open();
+	if (!jvmti_agent) {
+		warnx("jvmti: open_agent failed");
+		return -1;
+	}
+
+	/*
+	 * Request a JVMTI interface version 1 environment
+	 */
+	ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1);
+	if (ret != JNI_OK) {
+		warnx("jvmti: jvmti version 1 not supported");
+		return -1;
+	}
+
+	/*
+	 * acquire method_load capability, we require it
+	 * request line numbers (optional)
+	 */
+	memset(&caps1, 0, sizeof(caps1));
+	caps1.can_generate_compiled_method_load_events = 1;
+
+	ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: acquire compiled_method capability failed");
+		return -1;
+	}
+	ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
+        if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) {
+                memset(&caps1, 0, sizeof(caps1));
+                caps1.can_get_line_numbers = 1;
+                caps1.can_get_source_file_name = 1;
+		ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+                if (ret == JVMTI_ERROR_NONE)
+                        has_line_numbers = 1;
+        }
+
+	memset(&cb, 0, sizeof(cb));
+
+	cb.CompiledMethodLoad   = compiled_method_load_cb;
+	cb.DynamicCodeGenerated = code_generated_cb;
+
+	ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: cannot set event callbacks");
+		return -1;
+	}
+
+	ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+			JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: setnotification failed for method_load");
+		return -1;
+	}
+
+	ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+			JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: setnotification failed on code_generated");
+		return -1;
+	}
+	return 0;
+}
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *jvm __unused)
+{
+	int ret;
+
+	ret = jvmti_close(jvmti_agent);
+	if (ret)
+		errx(1, "Error: op_close_agent()");
+}
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index a929618..aaee0a7 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -454,11 +454,12 @@
 
 static void execv_dashed_external(const char **argv)
 {
-	struct strbuf cmd = STRBUF_INIT;
+	char *cmd;
 	const char *tmp;
 	int status;
 
-	strbuf_addf(&cmd, "perf-%s", argv[0]);
+	if (asprintf(&cmd, "perf-%s", argv[0]) < 0)
+		goto do_die;
 
 	/*
 	 * argv[0] must be the perf command, but the argv array
@@ -467,7 +468,7 @@
 	 * restore it on error.
 	 */
 	tmp = argv[0];
-	argv[0] = cmd.buf;
+	argv[0] = cmd;
 
 	/*
 	 * if we fail because the command is not found, it is
@@ -475,15 +476,16 @@
 	 */
 	status = run_command_v_opt(argv, 0);
 	if (status != -ERR_RUN_COMMAND_EXEC) {
-		if (IS_RUN_COMMAND_ERR(status))
+		if (IS_RUN_COMMAND_ERR(status)) {
+do_die:
 			die("unable to run '%s'", argv[0]);
+		}
 		exit(-status);
 	}
 	errno = ENOENT; /* as if we called execvp */
 
 	argv[0] = tmp;
-
-	strbuf_release(&cmd);
+	zfree(&cmd);
 }
 
 static int run_argv(int *argcp, const char ***argv)
@@ -546,6 +548,8 @@
 
 	srandom(time(NULL));
 
+	perf_config(perf_default_config, NULL);
+
 	/* get debugfs/tracefs mount point from /proc/mounts */
 	tracing_path_mount();
 
@@ -613,6 +617,8 @@
 	 */
 	pthread__block_sigwinch();
 
+	perf_debug_setup();
+
 	while (1) {
 		static int done_help;
 		int was_alias = run_argv(&argc, &argv);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 90129ac..5381a01 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -58,6 +58,8 @@
 	bool	     full_auxtrace;
 	bool	     auxtrace_snapshot_mode;
 	bool	     record_switch_events;
+	bool	     all_kernel;
+	bool	     all_user;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 15c8400..1d95009 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -71,7 +71,10 @@
 except:
 	if not audit_package_warned:
 		audit_package_warned = True
-		print "Install the audit-libs-python package to get syscall names"
+		print "Install the audit-libs-python package to get syscall names.\n" \
+                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
+                    "\n  # yum install audit-libs-python (Fedora)" \
+                    "\n  etc.\n"
 
 def syscall_name(id):
 	try:
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index bf016c4..8cc30e7 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,3 +1,4 @@
 llvm-src-base.c
 llvm-src-kbuild.c
 llvm-src-prologue.c
+llvm-src-relocation.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 614899b..1ba628e 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,7 +31,7 @@
 perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
 perf-y += bpf.o
 perf-y += topology.o
 perf-y += cpumap.o
@@ -59,6 +59,13 @@
 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
 	$(Q)echo ';' >> $@
 
+$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
+	$(call rule_mkdir)
+	$(Q)echo '#include <tests/llvm.h>' > $@
+	$(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+	$(Q)echo ';' >> $@
+
 ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index fb80c9e..e7664fe 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -29,14 +29,59 @@
 
 static int fd1;
 static int fd2;
+static int fd3;
 static int overflows;
+static int overflows_2;
+
+volatile long the_var;
+
+
+/*
+ * Use ASM to ensure watchpoint and breakpoint can be triggered
+ * at one instruction.
+ */
+#if defined (__x86_64__)
+extern void __test_function(volatile long *ptr);
+asm (
+	".globl __test_function\n"
+	"__test_function:\n"
+	"incq (%rdi)\n"
+	"ret\n");
+#elif defined (__aarch64__)
+extern void __test_function(volatile long *ptr);
+asm (
+	".globl __test_function\n"
+	"__test_function:\n"
+	"str x30, [x0]\n"
+	"ret\n");
+
+#else
+static void __test_function(volatile long *ptr)
+{
+	*ptr = 0x1234;
+}
+#endif
 
 __attribute__ ((noinline))
 static int test_function(void)
 {
+	__test_function(&the_var);
+	the_var++;
 	return time(NULL);
 }
 
+static void sig_handler_2(int signum __maybe_unused,
+			  siginfo_t *oh __maybe_unused,
+			  void *uc __maybe_unused)
+{
+	overflows_2++;
+	if (overflows_2 > 10) {
+		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+	}
+}
+
 static void sig_handler(int signum __maybe_unused,
 			siginfo_t *oh __maybe_unused,
 			void *uc __maybe_unused)
@@ -54,10 +99,11 @@
 		 */
 		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
 		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
 	}
 }
 
-static int bp_event(void *fn, int setup_signal)
+static int __event(bool is_x, void *addr, int sig)
 {
 	struct perf_event_attr pe;
 	int fd;
@@ -67,8 +113,8 @@
 	pe.size = sizeof(struct perf_event_attr);
 
 	pe.config = 0;
-	pe.bp_type = HW_BREAKPOINT_X;
-	pe.bp_addr = (unsigned long) fn;
+	pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+	pe.bp_addr = (unsigned long) addr;
 	pe.bp_len = sizeof(long);
 
 	pe.sample_period = 1;
@@ -86,17 +132,25 @@
 		return TEST_FAIL;
 	}
 
-	if (setup_signal) {
-		fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
-		fcntl(fd, F_SETSIG, SIGIO);
-		fcntl(fd, F_SETOWN, getpid());
-	}
+	fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+	fcntl(fd, F_SETSIG, sig);
+	fcntl(fd, F_SETOWN, getpid());
 
 	ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 
 	return fd;
 }
 
+static int bp_event(void *addr, int sig)
+{
+	return __event(true, addr, sig);
+}
+
+static int wp_event(void *addr, int sig)
+{
+	return __event(false, addr, sig);
+}
+
 static long long bp_count(int fd)
 {
 	long long count;
@@ -114,7 +168,7 @@
 int test__bp_signal(int subtest __maybe_unused)
 {
 	struct sigaction sa;
-	long long count1, count2;
+	long long count1, count2, count3;
 
 	/* setup SIGIO signal handler */
 	memset(&sa, 0, sizeof(struct sigaction));
@@ -126,21 +180,52 @@
 		return TEST_FAIL;
 	}
 
+	sa.sa_sigaction = (void *) sig_handler_2;
+	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+		pr_debug("failed setting up signal handler 2\n");
+		return TEST_FAIL;
+	}
+
 	/*
 	 * We create following events:
 	 *
-	 * fd1 - breakpoint event on test_function with SIGIO
+	 * fd1 - breakpoint event on __test_function with SIGIO
 	 *       signal configured. We should get signal
 	 *       notification each time the breakpoint is hit
 	 *
-	 * fd2 - breakpoint event on sig_handler without SIGIO
+	 * fd2 - breakpoint event on sig_handler with SIGUSR1
+	 *       configured. We should get SIGUSR1 each time when
+	 *       breakpoint is hit
+	 *
+	 * fd3 - watchpoint event on __test_function with SIGIO
 	 *       configured.
 	 *
 	 * Following processing should happen:
-	 *   - execute test_function
-	 *   - fd1 event breakpoint hit -> count1 == 1
-	 *   - SIGIO is delivered       -> overflows == 1
-	 *   - fd2 event breakpoint hit -> count2 == 1
+	 *   Exec:               Action:                       Result:
+	 *   incq (%rdi)       - fd1 event breakpoint hit   -> count1 == 1
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 1
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 1  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows = 1
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *   incq (%rdi)       - fd3 event watchpoint hit   -> count3 == 1       (wp and bp in one insn)
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 2
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 2  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows = 2
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *   the_var++         - fd3 event watchpoint hit   -> count3 == 2       (standalone watchpoint)
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 3
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 3  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows == 3
+	 *   sys_rt_sigreturn  - return from sig_handler
 	 *
 	 * The test case check following error conditions:
 	 * - we get stuck in signal handler because of debug
@@ -152,11 +237,13 @@
 	 *
 	 */
 
-	fd1 = bp_event(test_function, 1);
-	fd2 = bp_event(sig_handler, 0);
+	fd1 = bp_event(__test_function, SIGIO);
+	fd2 = bp_event(sig_handler, SIGUSR1);
+	fd3 = wp_event((void *)&the_var, SIGIO);
 
 	ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
 	ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
 
 	/*
 	 * Kick off the test by trigering 'fd1'
@@ -166,15 +253,18 @@
 
 	ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
 	ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+	ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
 
 	count1 = bp_count(fd1);
 	count2 = bp_count(fd2);
+	count3 = bp_count(fd3);
 
 	close(fd1);
 	close(fd2);
+	close(fd3);
 
-	pr_debug("count1 %lld, count2 %lld, overflow %d\n",
-		 count1, count2, overflows);
+	pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n",
+		 count1, count2, count3, overflows, overflows_2);
 
 	if (count1 != 1) {
 		if (count1 == 11)
@@ -183,12 +273,18 @@
 			pr_debug("failed: wrong count for bp1%lld\n", count1);
 	}
 
-	if (overflows != 1)
+	if (overflows != 3)
 		pr_debug("failed: wrong overflow hit\n");
 
-	if (count2 != 1)
+	if (overflows_2 != 3)
+		pr_debug("failed: wrong overflow_2 hit\n");
+
+	if (count2 != 3)
 		pr_debug("failed: wrong count for bp2\n");
 
-	return count1 == 1 && overflows == 1 && count2 == 1 ?
+	if (count3 != 2)
+		pr_debug("failed: wrong count for bp3\n");
+
+	return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
 		TEST_OK : TEST_FAIL;
 }
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
new file mode 100644
index 0000000..93af774
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-relocation.c
@@ -0,0 +1,50 @@
+/*
+ * bpf-script-test-relocation.c
+ * Test BPF loader checking relocation
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+	(void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") my_table = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+int this_is_a_global_val;
+
+SEC("func=sys_write")
+int bpf_func__sys_write(void *ctx)
+{
+	int key = 0;
+	int value = 0;
+
+	/*
+	 * Incorrect relocation. Should not allow this program be
+	 * loaded into kernel.
+	 */
+	bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 33689a0..199501c 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -1,7 +1,11 @@
 #include <stdio.h>
 #include <sys/epoll.h>
+#include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <bpf/bpf.h>
 #include "tests.h"
 #include "llvm.h"
 #include "debug.h"
@@ -71,6 +75,15 @@
 		(NR_ITERS + 1) / 4,
 	},
 #endif
+	{
+		LLVM_TESTCASE_BPF_RELOCATION,
+		"Test BPF relocation checker",
+		"[bpf_relocation_test]",
+		"fix 'perf test LLVM' first",
+		"libbpf error when dealing with relocation",
+		NULL,
+		0,
+	},
 };
 
 static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -99,7 +112,7 @@
 	parse_evlist.error = &parse_error;
 	INIT_LIST_HEAD(&parse_evlist.list);
 
-	err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
+	err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL);
 	if (err || list_empty(&parse_evlist.list)) {
 		pr_debug("Failed to add events selected by BPF\n");
 		return TEST_FAIL;
@@ -190,7 +203,7 @@
 
 	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
 				       bpf_testcase_table[idx].prog_id,
-				       true);
+				       true, NULL);
 	if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
 		pr_debug("Unable to get BPF object, %s\n",
 			 bpf_testcase_table[idx].msg_compile_fail);
@@ -202,14 +215,21 @@
 
 	obj = prepare_bpf(obj_buf, obj_buf_sz,
 			  bpf_testcase_table[idx].name);
-	if (!obj) {
+	if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
+		if (!obj)
+			pr_debug("Fail to load BPF object: %s\n",
+				 bpf_testcase_table[idx].msg_load_fail);
+		else
+			pr_debug("Success unexpectedly: %s\n",
+				 bpf_testcase_table[idx].msg_load_fail);
 		ret = TEST_FAIL;
 		goto out;
 	}
 
-	ret = do_test(obj,
-		      bpf_testcase_table[idx].target_func,
-		      bpf_testcase_table[idx].expect_result);
+	if (obj)
+		ret = do_test(obj,
+			      bpf_testcase_table[idx].target_func,
+			      bpf_testcase_table[idx].expect_result);
 out:
 	bpf__clear();
 	return ret;
@@ -227,6 +247,36 @@
 	return bpf_testcase_table[i].desc;
 }
 
+static int check_env(void)
+{
+	int err;
+	unsigned int kver_int;
+	char license[] = "GPL";
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	err = fetch_kernel_version(&kver_int, NULL, 0);
+	if (err) {
+		pr_debug("Unable to get kernel version\n");
+		return err;
+	}
+
+	err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+			       sizeof(insns) / sizeof(insns[0]),
+			       license, kver_int, NULL, 0);
+	if (err < 0) {
+		pr_err("Missing basic BPF support, skip this test: %s\n",
+		       strerror(errno));
+		return err;
+	}
+	close(err);
+
+	return 0;
+}
+
 int test__bpf(int i)
 {
 	int err;
@@ -239,6 +289,9 @@
 		return TEST_SKIP;
 	}
 
+	if (check_env())
+		return TEST_SKIP;
+
 	err = __test__bpf(i);
 	return err;
 }
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 313a48c..afc9ad0 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -439,7 +439,7 @@
 		.mmap_pages	     = UINT_MAX,
 		.user_freq	     = UINT_MAX,
 		.user_interval	     = ULLONG_MAX,
-		.freq		     = 4000,
+		.freq		     = 500,
 		.target		     = {
 			.uses_mmap   = true,
 		},
@@ -559,7 +559,13 @@
 				evlist = NULL;
 				continue;
 			}
-			pr_debug("perf_evlist__open failed\n");
+
+			if (verbose) {
+				char errbuf[512];
+				perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
+			}
+
 			goto out_put;
 		}
 		break;
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 5e6a86e..ecf136c 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -191,7 +191,7 @@
 	 * function since TEST_ASSERT_VAL() returns in case of failure.
 	 */
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(hists_to_evsel(hists), NULL);
 
 	if (verbose > 2) {
 		pr_info("use callchain: %d, cumulate callchain: %d\n",
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 351a424..34b945a 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -145,7 +145,7 @@
 		struct hists *hists = evsel__hists(evsel);
 
 		hists__collapse_resort(hists, NULL);
-		hists__output_resort(hists, NULL);
+		perf_evsel__output_resort(evsel, NULL);
 
 		if (verbose > 2) {
 			pr_info("Normal histogram\n");
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index b2312651..23cce67 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -156,7 +156,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -256,7 +256,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -310,7 +310,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -388,7 +388,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -491,7 +491,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 06f45c1..cff564f 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -6,12 +6,6 @@
 #include "tests.h"
 #include "debug.h"
 
-static int perf_config_cb(const char *var, const char *val,
-			  void *arg __maybe_unused)
-{
-	return perf_default_config(var, val, arg);
-}
-
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
@@ -35,6 +29,7 @@
 static struct {
 	const char *source;
 	const char *desc;
+	bool should_load_fail;
 } bpf_source_table[__LLVM_TESTCASE_MAX] = {
 	[LLVM_TESTCASE_BASE] = {
 		.source = test_llvm__bpf_base_prog,
@@ -48,14 +43,19 @@
 		.source = test_llvm__bpf_test_prologue_prog,
 		.desc = "Compile source for BPF prologue generation test",
 	},
+	[LLVM_TESTCASE_BPF_RELOCATION] = {
+		.source = test_llvm__bpf_test_relocation,
+		.desc = "Compile source for BPF relocation test",
+		.should_load_fail = true,
+	},
 };
 
-
 int
 test_llvm__fetch_bpf_obj(void **p_obj_buf,
 			 size_t *p_obj_buf_sz,
 			 enum test_llvm__testcase idx,
-			 bool force)
+			 bool force,
+			 bool *should_load_fail)
 {
 	const char *source;
 	const char *desc;
@@ -68,8 +68,8 @@
 
 	source = bpf_source_table[idx].source;
 	desc = bpf_source_table[idx].desc;
-
-	perf_config(perf_config_cb, NULL);
+	if (should_load_fail)
+		*should_load_fail = bpf_source_table[idx].should_load_fail;
 
 	/*
 	 * Skip this test if user's .perfconfig doesn't set [llvm] section
@@ -136,14 +136,15 @@
 	int ret;
 	void *obj_buf = NULL;
 	size_t obj_buf_sz = 0;
+	bool should_load_fail = false;
 
 	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
 		return TEST_FAIL;
 
 	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
-				       subtest, false);
+				       subtest, false, &should_load_fail);
 
-	if (ret == TEST_OK) {
+	if (ret == TEST_OK && !should_load_fail) {
 		ret = test__bpf_parsing(obj_buf, obj_buf_sz);
 		if (ret != TEST_OK) {
 			pr_debug("Failed to parse test case '%s'\n",
diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
index 5150b4d..0eaa604 100644
--- a/tools/perf/tests/llvm.h
+++ b/tools/perf/tests/llvm.h
@@ -7,14 +7,17 @@
 extern const char test_llvm__bpf_base_prog[];
 extern const char test_llvm__bpf_test_kbuild_prog[];
 extern const char test_llvm__bpf_test_prologue_prog[];
+extern const char test_llvm__bpf_test_relocation[];
 
 enum test_llvm__testcase {
 	LLVM_TESTCASE_BASE,
 	LLVM_TESTCASE_KBUILD,
 	LLVM_TESTCASE_BPF_PROLOGUE,
+	LLVM_TESTCASE_BPF_RELOCATION,
 	__LLVM_TESTCASE_MAX,
 };
 
 int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
-			     enum test_llvm__testcase index, bool force);
+			     enum test_llvm__testcase index, bool force,
+			     bool *should_load_fail);
 #endif
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index df38dec..cac15d9 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@
 # no target specified, trigger the whole suite
 all:
 	@echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -13,6 +13,27 @@
 endif
 else
 PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O))
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+  K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -59,6 +80,7 @@
 make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf	    := NO_LIBBPF=1
+make_no_libcrypto   := NO_LIBCRYPTO=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -82,6 +104,7 @@
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal        += NO_LIBCRYPTO=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -90,6 +113,9 @@
 # disable features detection
 ifeq ($(MK),Makefile)
 run += make_clean_all
+MAKE_F := $(MAKE)
+else
+MAKE_F := $(MAKE) -f $(MK)
 endif
 run += make_python_perf_so
 run += make_debug
@@ -156,11 +182,11 @@
 test_make_help_O := $(test_ok)
 test_make_doc_O  := $(test_ok)
 
-test_make_python_perf_so := test -f $(PERF)/python/perf.so
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
 
-test_make_perf_o           := test -f $(PERF)/perf.o
-test_make_util_map_o       := test -f $(PERF)/util/map.o
-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
 
 define test_dest_files
   for file in $(1); do				\
@@ -227,7 +253,7 @@
 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
 
-test_default = test -x $(PERF)/perf
+test_default = test -x $(PERF_O)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
 test_default_O = test -x $$TMP_O/perf
@@ -240,6 +266,8 @@
 run_O := $(shell shuf -e $(run_O))
 endif
 
+max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L)
+
 ifdef DEBUG
 d := $(info run   $(run))
 d := $(info run_O $(run_O))
@@ -247,13 +275,13 @@
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
+clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null)
 
 $(run):
 	$(call clean)
 	@TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
-	echo "- $@: $$cmd" && echo $$cmd > $@ && \
+	cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \
+	printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1; \
 	echo "  test: $(call test,$@)" >> $@ 2>&1; \
 	$(call test,$@) && \
@@ -263,8 +291,8 @@
 	$(call clean)
 	@TMP_O=$$(mktemp -d); \
 	TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
-	echo "- $@: $$cmd" && echo $$cmd > $@ && \
+	cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \
+	printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1 && \
 	echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
 	$(call test_O,$@) && \
@@ -276,23 +304,60 @@
 	( eval $$cmd ) >> $@ 2>&1 && \
 	rm -f $@
 
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
 make_kernelsrc:
-	@echo "- make -C <kernelsrc> tools/perf"
+	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
 	$(call clean); \
-	(make -C ../.. tools/perf) > $@ 2>&1 && \
-	test -x perf && rm -f $@ || (cat $@ ; false)
+	(make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-	@echo "- make -C <kernelsrc>/tools perf"
+	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
 	$(call clean); \
-	(make -C ../../tools perf) > $@ 2>&1 && \
-	test -x perf && rm -f $@ || (cat $@ ; false)
+	(make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
+
+FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
+FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
 	@echo OK
+	@rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
 
 out: $(run_O)
 	@echo OK
+	@rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+ifeq ($(REUSE_FEATURES_DUMP),1)
+$(FEATURES_DUMP_FILE):
+	$(call clean)
+	@cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \
+	echo "- $@: $$cmd" && echo $$cmd && \
+	( eval $$cmd ) > /dev/null 2>&1
+
+$(FEATURES_DUMP_FILE_STATIC):
+	$(call clean)
+	@cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \
+	echo "- $@: $$cmd" && echo $$cmd && \
+	( eval $$cmd ) > /dev/null 2>&1
+
+# Add feature dump dependency for run/run_O targets
+$(foreach t,$(run) $(run_O),$(eval \
+	$(t): $(if $(findstring make_static,$(t)),\
+		$(FEATURES_DUMP_FILE_STATIC),\
+		$(FEATURES_DUMP_FILE))))
+
+# Append 'FEATURES_DUMP=' option to all test cases. For example:
+# make_no_libbpf: NO_LIBBPF=1  --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP
+# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC
+$(foreach t,$(run),$(if $(findstring make_static,$(t)),\
+			$(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\
+			$(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
+endif
 
 .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
 endif # ifndef MK
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index abe8849..7865f68 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1271,6 +1271,38 @@
 	return 0;
 }
 
+static int test__checkevent_config_symbol(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_raw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_num(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_cache(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	struct dirent *events_ent;
@@ -1579,6 +1611,26 @@
 		.check = test__checkevent_precise_max_modifier,
 		.id    = 47,
 	},
+	{
+		.name  = "instructions/name=insn/",
+		.check = test__checkevent_config_symbol,
+		.id    = 48,
+	},
+	{
+		.name  = "r1234/name=rawpmu/",
+		.check = test__checkevent_config_raw,
+		.id    = 49,
+	},
+	{
+		.name  = "4:0x6530160/name=numpmu/",
+		.check = test__checkevent_config_num,
+		.id    = 50,
+	},
+	{
+		.name  = "L1-dcache-misses/name=cachepmu/",
+		.check = test__checkevent_config_cache,
+		.id    = 51,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
@@ -1666,7 +1718,7 @@
 	}
 
 	ret = t->check(&terms);
-	parse_events__free_terms(&terms);
+	parse_events_terms__purge(&terms);
 
 	return ret;
 }
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index f0bfc9e..630b0b4 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -110,7 +110,6 @@
 	 */
 	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
 		struct symbol *pair, *first_pair;
-		bool backwards = true;
 
 		sym  = rb_entry(nd, struct symbol, rb_node);
 
@@ -151,27 +150,14 @@
 				continue;
 
 			} else {
-				struct rb_node *nnd;
-detour:
-				nnd = backwards ? rb_prev(&pair->rb_node) :
-						  rb_next(&pair->rb_node);
-				if (nnd) {
-					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
-
-					if (UM(next->start) == mem_start) {
-						pair = next;
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+				if (pair) {
+					if (UM(pair->start) == mem_start)
 						goto next_pair;
-					}
-				}
 
-				if (backwards) {
-					backwards = false;
-					pair = first_pair;
-					goto detour;
+					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+						 mem_start, sym->name, pair->name);
 				}
-
-				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-					 mem_start, sym->name, pair->name);
 			}
 		} else
 			pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index d372021..af68a9d 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -531,8 +531,8 @@
 		.bg	  = "yellow",
 	},
 	{
-		.colorset = HE_COLORSET_CODE,
-		.name	  = "code",
+		.colorset = HE_COLORSET_JUMP_ARROWS,
+		.name	  = "jump_arrows",
 		.fg	  = "blue",
 		.bg	  = "default",
 	},
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 01781de..be3b70e 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -7,7 +7,7 @@
 #define HE_COLORSET_MEDIUM	51
 #define HE_COLORSET_NORMAL	52
 #define HE_COLORSET_SELECTED	53
-#define HE_COLORSET_CODE	54
+#define HE_COLORSET_JUMP_ARROWS	54
 #define HE_COLORSET_ADDR	55
 #define HE_COLORSET_ROOT	56
 
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d4d7cc2..4fc208e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -284,7 +284,7 @@
 		to = (u64)btarget->idx;
 	}
 
-	ui_browser__set_color(browser, HE_COLORSET_CODE);
+	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
 				 from, to);
 }
@@ -755,11 +755,11 @@
 				nd = browser->curr_hot;
 			break;
 		case K_UNTAB:
-			if (nd != NULL)
+			if (nd != NULL) {
 				nd = rb_next(nd);
 				if (nd == NULL)
 					nd = rb_first(&browser->entries);
-			else
+			} else
 				nd = browser->curr_hot;
 			break;
 		case K_F1:
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 08c09ad..4b98165 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -32,6 +32,7 @@
 	bool		     show_headers;
 	float		     min_pcnt;
 	u64		     nr_non_filtered_entries;
+	u64		     nr_hierarchy_entries;
 	u64		     nr_callchain_rows;
 };
 
@@ -58,11 +59,11 @@
 
 	for (nd = rb_first(&hists->entries);
 	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-	     nd = rb_next(nd)) {
+	     nd = rb_hierarchy_next(nd)) {
 		struct hist_entry *he =
 			rb_entry(nd, struct hist_entry, rb_node);
 
-		if (he->unfolded)
+		if (he->leaf && he->unfolded)
 			unfolded_rows += he->nr_rows;
 	}
 	return unfolded_rows;
@@ -72,7 +73,9 @@
 {
 	u32 nr_entries;
 
-	if (hist_browser__has_filter(hb))
+	if (symbol_conf.report_hierarchy)
+		nr_entries = hb->nr_hierarchy_entries;
+	else if (hist_browser__has_filter(hb))
 		nr_entries = hb->nr_non_filtered_entries;
 	else
 		nr_entries = hb->hists->nr_entries;
@@ -247,6 +250,38 @@
 	return n;
 }
 
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+				bool include_children)
+{
+	int count = 0;
+	struct rb_node *node;
+	struct hist_entry *child;
+
+	if (he->leaf)
+		return callchain__count_rows(&he->sorted_chain);
+
+	if (he->has_no_entry)
+		return 1;
+
+	node = rb_first(&he->hroot_out);
+	while (node) {
+		float percent;
+
+		child = rb_entry(node, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+
+		if (!child->filtered && percent >= hb->min_pcnt) {
+			count++;
+
+			if (include_children && child->unfolded)
+				count += hierarchy_count_rows(hb, child, true);
+		}
+
+		node = rb_next(node);
+	}
+	return count;
+}
+
 static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
 	if (!he)
@@ -326,11 +361,17 @@
 
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
-	if (!he->init_have_children) {
+	if (he->init_have_children)
+		return;
+
+	if (he->leaf) {
 		he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
 		callchain__init_have_children(&he->sorted_chain);
-		he->init_have_children = true;
+	} else {
+		he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
 	}
+
+	he->init_have_children = true;
 }
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
@@ -349,17 +390,49 @@
 		has_children = callchain_list__toggle_fold(cl);
 
 	if (has_children) {
+		int child_rows = 0;
+
 		hist_entry__init_have_children(he);
 		browser->b.nr_entries -= he->nr_rows;
-		browser->nr_callchain_rows -= he->nr_rows;
 
-		if (he->unfolded)
-			he->nr_rows = callchain__count_rows(&he->sorted_chain);
+		if (he->leaf)
+			browser->nr_callchain_rows -= he->nr_rows;
 		else
+			browser->nr_hierarchy_entries -= he->nr_rows;
+
+		if (symbol_conf.report_hierarchy)
+			child_rows = hierarchy_count_rows(browser, he, true);
+
+		if (he->unfolded) {
+			if (he->leaf)
+				he->nr_rows = callchain__count_rows(&he->sorted_chain);
+			else
+				he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+			/* account grand children */
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries += child_rows - he->nr_rows;
+
+			if (!he->leaf && he->nr_rows == 0) {
+				he->has_no_entry = true;
+				he->nr_rows = 1;
+			}
+		} else {
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries -= child_rows - he->nr_rows;
+
+			if (he->has_no_entry)
+				he->has_no_entry = false;
+
 			he->nr_rows = 0;
+		}
 
 		browser->b.nr_entries += he->nr_rows;
-		browser->nr_callchain_rows += he->nr_rows;
+
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
+		else
+			browser->nr_hierarchy_entries += he->nr_rows;
 
 		return true;
 	}
@@ -422,13 +495,38 @@
 	return n;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+				 bool unfold __maybe_unused)
+{
+	float percent;
+	struct rb_node *nd;
+	struct hist_entry *child;
+	int n = 0;
+
+	for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+		child = rb_entry(nd, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+		if (!child->filtered && percent >= hb->min_pcnt)
+			n++;
+	}
+
+	return n;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+				    struct hist_browser *hb, bool unfold)
 {
 	hist_entry__init_have_children(he);
 	he->unfolded = unfold ? he->has_children : false;
 
 	if (he->has_children) {
-		int n = callchain__set_folding(&he->sorted_chain, unfold);
+		int n;
+
+		if (he->leaf)
+			n = callchain__set_folding(&he->sorted_chain, unfold);
+		else
+			n = hierarchy_set_folding(hb, he, unfold);
+
 		he->nr_rows = unfold ? n : 0;
 	} else
 		he->nr_rows = 0;
@@ -438,19 +536,38 @@
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
 	struct rb_node *nd;
-	struct hists *hists = browser->hists;
+	struct hist_entry *he;
+	double percent;
 
-	for (nd = rb_first(&hists->entries);
-	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-	     nd = rb_next(nd)) {
-		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
-		hist_entry__set_folding(he, unfold);
-		browser->nr_callchain_rows += he->nr_rows;
+	nd = rb_first(&browser->hists->entries);
+	while (nd) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		/* set folding state even if it's currently folded */
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		hist_entry__set_folding(he, browser, unfold);
+
+		percent = hist_entry__get_percent_limit(he);
+		if (he->filtered || percent < browser->min_pcnt)
+			continue;
+
+		if (!he->depth || unfold)
+			browser->nr_hierarchy_entries++;
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
+		else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+			browser->nr_hierarchy_entries++;
+			he->has_no_entry = true;
+			he->nr_rows = 1;
+		} else
+			he->has_no_entry = false;
 	}
 }
 
 static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
+	browser->nr_hierarchy_entries = 0;
 	browser->nr_callchain_rows = 0;
 	__hist_browser__set_folding(browser, unfold);
 
@@ -657,9 +774,24 @@
 	return 1;
 }
 
+static bool check_percent_display(struct rb_node *node, u64 parent_total)
+{
+	struct callchain_node *child;
+
+	if (node == NULL)
+		return false;
+
+	if (rb_next(node))
+		return true;
+
+	child = rb_entry(node, struct callchain_node, rb_node);
+	return callchain_cumul_hits(child) != parent_total;
+}
+
 static int hist_browser__show_callchain_flat(struct hist_browser *browser,
 					     struct rb_root *root,
 					     unsigned short row, u64 total,
+					     u64 parent_total,
 					     print_callchain_entry_fn print,
 					     struct callchain_print_arg *arg,
 					     check_output_full_fn is_output_full)
@@ -669,7 +801,7 @@
 	bool need_percent;
 
 	node = rb_first(root);
-	need_percent = node && rb_next(node);
+	need_percent = check_percent_display(node, parent_total);
 
 	while (node) {
 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -763,6 +895,7 @@
 static int hist_browser__show_callchain_folded(struct hist_browser *browser,
 					       struct rb_root *root,
 					       unsigned short row, u64 total,
+					       u64 parent_total,
 					       print_callchain_entry_fn print,
 					       struct callchain_print_arg *arg,
 					       check_output_full_fn is_output_full)
@@ -772,7 +905,7 @@
 	bool need_percent;
 
 	node = rb_first(root);
-	need_percent = node && rb_next(node);
+	need_percent = check_percent_display(node, parent_total);
 
 	while (node) {
 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -844,20 +977,24 @@
 	return row - first_row;
 }
 
-static int hist_browser__show_callchain(struct hist_browser *browser,
+static int hist_browser__show_callchain_graph(struct hist_browser *browser,
 					struct rb_root *root, int level,
 					unsigned short row, u64 total,
+					u64 parent_total,
 					print_callchain_entry_fn print,
 					struct callchain_print_arg *arg,
 					check_output_full_fn is_output_full)
 {
 	struct rb_node *node;
 	int first_row = row, offset = level * LEVEL_OFFSET_STEP;
-	u64 new_total;
 	bool need_percent;
+	u64 percent_total = total;
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		percent_total = parent_total;
 
 	node = rb_first(root);
-	need_percent = node && rb_next(node);
+	need_percent = check_percent_display(node, parent_total);
 
 	while (node) {
 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -878,7 +1015,7 @@
 			folded_sign = callchain_list__folded(chain);
 
 			row += hist_browser__show_callchain_list(browser, child,
-							chain, row, total,
+							chain, row, percent_total,
 							was_first && need_percent,
 							offset + extra_offset,
 							print, arg);
@@ -893,13 +1030,9 @@
 		if (folded_sign == '-') {
 			const int new_level = level + (extra_offset ? 2 : 1);
 
-			if (callchain_param.mode == CHAIN_GRAPH_REL)
-				new_total = child->children_hit;
-			else
-				new_total = total;
-
-			row += hist_browser__show_callchain(browser, &child->rb_root,
-							    new_level, row, new_total,
+			row += hist_browser__show_callchain_graph(browser, &child->rb_root,
+							    new_level, row, total,
+							    child->children_hit,
 							    print, arg, is_output_full);
 		}
 		if (is_output_full(browser, row))
@@ -910,6 +1043,45 @@
 	return row - first_row;
 }
 
+static int hist_browser__show_callchain(struct hist_browser *browser,
+					struct hist_entry *entry, int level,
+					unsigned short row,
+					print_callchain_entry_fn print,
+					struct callchain_print_arg *arg,
+					check_output_full_fn is_output_full)
+{
+	u64 total = hists__total_period(entry->hists);
+	u64 parent_total;
+	int printed;
+
+	if (symbol_conf.cumulate_callchain)
+		parent_total = entry->stat_acc->period;
+	else
+		parent_total = entry->stat.period;
+
+	if (callchain_param.mode == CHAIN_FLAT) {
+		printed = hist_browser__show_callchain_flat(browser,
+						&entry->sorted_chain, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	} else if (callchain_param.mode == CHAIN_FOLDED) {
+		printed = hist_browser__show_callchain_folded(browser,
+						&entry->sorted_chain, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	} else {
+		printed = hist_browser__show_callchain_graph(browser,
+						&entry->sorted_chain, level, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	}
+
+	if (arg->is_current_entry)
+		browser->he_selection = entry;
+
+	return printed;
+}
+
 struct hpp_arg {
 	struct ui_browser *b;
 	char folded_sign;
@@ -1006,7 +1178,6 @@
 				    struct hist_entry *entry,
 				    unsigned short row)
 {
-	char s[256];
 	int printed = 0;
 	int width = browser->b.width;
 	char folded_sign = ' ';
@@ -1031,16 +1202,18 @@
 			.folded_sign	= folded_sign,
 			.current_entry	= current_entry,
 		};
-		struct perf_hpp hpp = {
-			.buf		= s,
-			.size		= sizeof(s),
-			.ptr		= &arg,
-		};
 		int column = 0;
 
 		hist_browser__gotorc(browser, row, 0);
 
-		perf_hpp__for_each_format(fmt) {
+		hists__for_each_format(browser->hists, fmt) {
+			char s[2048];
+			struct perf_hpp hpp = {
+				.buf	= s,
+				.size	= sizeof(s),
+				.ptr	= &arg,
+			};
+
 			if (perf_hpp__should_skip(fmt, entry->hists) ||
 			    column++ < browser->b.horiz_scroll)
 				continue;
@@ -1065,11 +1238,18 @@
 			}
 
 			if (fmt->color) {
-				width -= fmt->color(fmt, &hpp, entry);
+				int ret = fmt->color(fmt, &hpp, entry);
+				hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+				/*
+				 * fmt->color() already used ui_browser to
+				 * print the non alignment bits, skip it (+ret):
+				 */
+				ui_browser__printf(&browser->b, "%s", s + ret);
 			} else {
-				width -= fmt->entry(fmt, &hpp, entry);
+				hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry));
 				ui_browser__printf(&browser->b, "%s", s);
 			}
+			width -= hpp.buf - s;
 		}
 
 		/* The scroll bar isn't being used */
@@ -1084,43 +1264,246 @@
 		--row_offset;
 
 	if (folded_sign == '-' && row != browser->b.rows) {
-		u64 total = hists__total_period(entry->hists);
 		struct callchain_print_arg arg = {
 			.row_offset = row_offset,
 			.is_current_entry = current_entry,
 		};
 
-		if (callchain_param.mode == CHAIN_GRAPH_REL) {
-			if (symbol_conf.cumulate_callchain)
-				total = entry->stat_acc->period;
-			else
-				total = entry->stat.period;
-		}
-
-		if (callchain_param.mode == CHAIN_FLAT) {
-			printed += hist_browser__show_callchain_flat(browser,
-					&entry->sorted_chain, row, total,
+		printed += hist_browser__show_callchain(browser, entry, 1, row,
 					hist_browser__show_callchain_entry, &arg,
 					hist_browser__check_output_full);
-		} else if (callchain_param.mode == CHAIN_FOLDED) {
-			printed += hist_browser__show_callchain_folded(browser,
-					&entry->sorted_chain, row, total,
-					hist_browser__show_callchain_entry, &arg,
-					hist_browser__check_output_full);
-		} else {
-			printed += hist_browser__show_callchain(browser,
-					&entry->sorted_chain, 1, row, total,
-					hist_browser__show_callchain_entry, &arg,
-					hist_browser__check_output_full);
-		}
-
-		if (arg.is_current_entry)
-			browser->he_selection = entry;
 	}
 
 	return printed;
 }
 
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+					      struct hist_entry *entry,
+					      unsigned short row,
+					      int level)
+{
+	int printed = 0;
+	int width = browser->b.width;
+	char folded_sign = ' ';
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	off_t row_offset = entry->row_offset;
+	bool first = true;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	struct hpp_arg arg = {
+		.b		= &browser->b,
+		.current_entry	= current_entry,
+	};
+	int column = 0;
+	int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+	if (current_entry) {
+		browser->he_selection = entry;
+		browser->selection = &entry->ms;
+	}
+
+	hist_entry__init_have_children(entry);
+	folded_sign = hist_entry__folded(entry);
+	arg.folded_sign = folded_sign;
+
+	if (entry->leaf && row_offset) {
+		row_offset--;
+		goto show_callchain;
+	}
+
+	hist_browser__gotorc(browser, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&entry->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (perf_hpp__should_skip(fmt, entry->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		if (first) {
+			ui_browser__printf(&browser->b, "%c", folded_sign);
+			width--;
+			first = false;
+		} else {
+			ui_browser__printf(&browser->b, "  ");
+			width -= 2;
+		}
+
+		if (fmt->color) {
+			int ret = fmt->color(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			/*
+			 * fmt->color() already used ui_browser to
+			 * print the non alignment bits, skip it (+ret):
+			 */
+			ui_browser__printf(&browser->b, "%s", s + ret);
+		} else {
+			int ret = fmt->entry(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			ui_browser__printf(&browser->b, "%s", s);
+		}
+		width -= hpp.buf - s;
+	}
+
+	ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+	width -= hierarchy_indent;
+
+	if (column >= browser->b.horiz_scroll) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
+			ui_browser__write_nstring(&browser->b, "", 2);
+			width -= 2;
+
+			/*
+			 * No need to call hist_entry__snprintf_alignment()
+			 * since this fmt is always the last column in the
+			 * hierarchy mode.
+			 */
+			if (fmt->color) {
+				width -= fmt->color(fmt, &hpp, entry);
+			} else {
+				int i = 0;
+
+				width -= fmt->entry(fmt, &hpp, entry);
+				ui_browser__printf(&browser->b, "%s", ltrim(s));
+
+				while (isspace(s[i++]))
+					width++;
+			}
+		}
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+
+	++row;
+	++printed;
+
+show_callchain:
+	if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+		struct callchain_print_arg carg = {
+			.row_offset = row_offset,
+		};
+
+		printed += hist_browser__show_callchain(browser, entry,
+					level + 1, row,
+					hist_browser__show_callchain_entry, &carg,
+					hist_browser__check_output_full);
+	}
+
+	return printed;
+}
+
+static int hist_browser__show_no_entry(struct hist_browser *browser,
+				       unsigned short row, int level)
+{
+	int width = browser->b.width;
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	bool first = true;
+	int column = 0;
+	int ret;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	int indent = browser->hists->nr_hpp_node - 2;
+
+	if (current_entry) {
+		browser->he_selection = NULL;
+		browser->selection = NULL;
+	}
+
+	hist_browser__gotorc(browser, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&browser->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (perf_hpp__should_skip(fmt, browser->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists));
+
+		if (first) {
+			/* for folded sign */
+			first = false;
+			ret++;
+		} else {
+			/* space between columns */
+			ret += 2;
+		}
+
+		ui_browser__write_nstring(&browser->b, "", ret);
+		width -= ret;
+	}
+
+	ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT);
+	width -= indent * HIERARCHY_INDENT;
+
+	if (column >= browser->b.horiz_scroll) {
+		char buf[32];
+
+		ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt);
+		ui_browser__printf(&browser->b, "  %s", buf);
+		width -= ret + 2;
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+	return 1;
+}
+
 static int advance_hpp_check(struct perf_hpp *hpp, int inc)
 {
 	advance_hpp(hpp, inc);
@@ -1144,7 +1527,7 @@
 			return ret;
 	}
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(browser->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
@@ -1160,11 +1543,96 @@
 	return ret;
 }
 
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+	struct hists *hists = browser->hists;
+	struct perf_hpp dummy_hpp = {
+		.buf    = buf,
+		.size   = size,
+	};
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	size_t ret = 0;
+	int column = 0;
+	int indent = hists->nr_hpp_node - 2;
+	bool first_node, first_col;
+
+	ret = scnprintf(buf, size, " ");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+	}
+
+	ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+			indent * HIERARCHY_INDENT, "");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node) {
+			ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			char *start;
+
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col) {
+				ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+");
+				if (advance_hpp_check(&dummy_hpp, ret))
+					break;
+			}
+			first_col = false;
+
+			ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+			dummy_hpp.buf[ret] = '\0';
+			rtrim(dummy_hpp.buf);
+
+			start = ltrim(dummy_hpp.buf);
+			ret = strlen(start);
+
+			if (start != dummy_hpp.buf)
+				memmove(dummy_hpp.buf, start, ret + 1);
+
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+	}
+
+	return ret;
+}
+
 static void hist_browser__show_headers(struct hist_browser *browser)
 {
 	char headers[1024];
 
-	hists_browser__scnprintf_headers(browser, headers, sizeof(headers));
+	if (symbol_conf.report_hierarchy)
+		hists_browser__scnprintf_hierarchy_headers(browser, headers,
+							   sizeof(headers));
+	else
+		hists_browser__scnprintf_headers(browser, headers,
+						 sizeof(headers));
 	ui_browser__gotorc(&browser->b, 0, 0);
 	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
 	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
@@ -1196,18 +1664,34 @@
 	hb->he_selection = NULL;
 	hb->selection = NULL;
 
-	for (nd = browser->top; nd; nd = rb_next(nd)) {
+	for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		float percent;
 
-		if (h->filtered)
+		if (h->filtered) {
+			/* let it move to sibling */
+			h->unfolded = false;
 			continue;
+		}
 
 		percent = hist_entry__get_percent_limit(h);
 		if (percent < hb->min_pcnt)
 			continue;
 
-		row += hist_browser__show_entry(hb, h, row);
+		if (symbol_conf.report_hierarchy) {
+			row += hist_browser__show_hierarchy_entry(hb, h, row,
+								  h->depth);
+			if (row == browser->rows)
+				break;
+
+			if (h->has_no_entry) {
+				hist_browser__show_no_entry(hb, row, h->depth + 1);
+				row++;
+			}
+		} else {
+			row += hist_browser__show_entry(hb, h, row);
+		}
+
 		if (row == browser->rows)
 			break;
 	}
@@ -1225,7 +1709,14 @@
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
 
-		nd = rb_next(nd);
+		/*
+		 * If it's filtered, its all children also were filtered.
+		 * So move to sibling node.
+		 */
+		if (rb_next(nd))
+			nd = rb_next(nd);
+		else
+			nd = rb_hierarchy_next(nd);
 	}
 
 	return NULL;
@@ -1241,7 +1732,7 @@
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
 
-		nd = rb_prev(nd);
+		nd = rb_hierarchy_prev(nd);
 	}
 
 	return NULL;
@@ -1271,8 +1762,8 @@
 		nd = browser->top;
 		goto do_offset;
 	case SEEK_END:
-		nd = hists__filter_prev_entries(rb_last(browser->entries),
-						hb->min_pcnt);
+		nd = rb_hierarchy_last(rb_last(browser->entries));
+		nd = hists__filter_prev_entries(nd, hb->min_pcnt);
 		first = false;
 		break;
 	default:
@@ -1306,7 +1797,7 @@
 	if (offset > 0) {
 		do {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->unfolded) {
+			if (h->unfolded && h->leaf) {
 				u16 remaining = h->nr_rows - h->row_offset;
 				if (offset > remaining) {
 					offset -= remaining;
@@ -1318,7 +1809,8 @@
 					break;
 				}
 			}
-			nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
+			nd = hists__filter_entries(rb_hierarchy_next(nd),
+						   hb->min_pcnt);
 			if (nd == NULL)
 				break;
 			--offset;
@@ -1327,7 +1819,7 @@
 	} else if (offset < 0) {
 		while (1) {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->unfolded) {
+			if (h->unfolded && h->leaf) {
 				if (first) {
 					if (-offset > h->row_offset) {
 						offset += h->row_offset;
@@ -1351,7 +1843,7 @@
 				}
 			}
 
-			nd = hists__filter_prev_entries(rb_prev(nd),
+			nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
 							hb->min_pcnt);
 			if (nd == NULL)
 				break;
@@ -1364,7 +1856,7 @@
 				 * row_offset at its last entry.
 				 */
 				h = rb_entry(nd, struct hist_entry, rb_node);
-				if (h->unfolded)
+				if (h->unfolded && h->leaf)
 					h->row_offset = h->nr_rows;
 				break;
 			}
@@ -1378,17 +1870,14 @@
 }
 
 static int hist_browser__fprintf_callchain(struct hist_browser *browser,
-					   struct hist_entry *he, FILE *fp)
+					   struct hist_entry *he, FILE *fp,
+					   int level)
 {
-	u64 total = hists__total_period(he->hists);
 	struct callchain_print_arg arg  = {
 		.fp = fp,
 	};
 
-	if (symbol_conf.cumulate_callchain)
-		total = he->stat_acc->period;
-
-	hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total,
+	hist_browser__show_callchain(browser, he, level, 0,
 				     hist_browser__fprintf_callchain_entry, &arg,
 				     hist_browser__check_dump_full);
 	return arg.printed;
@@ -1414,7 +1903,7 @@
 	if (symbol_conf.use_callchain)
 		printed += fprintf(fp, "%c ", folded_sign);
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(browser->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
 			continue;
 
@@ -1425,12 +1914,71 @@
 			first = false;
 
 		ret = fmt->entry(fmt, &hpp, he);
+		ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret);
 		advance_hpp(&hpp, ret);
 	}
-	printed += fprintf(fp, "%s\n", rtrim(s));
+	printed += fprintf(fp, "%s\n", s);
 
 	if (folded_sign == '-')
-		printed += hist_browser__fprintf_callchain(browser, he, fp);
+		printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+	return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+						 struct hist_entry *he,
+						 FILE *fp, int level)
+{
+	char s[8192];
+	int printed = 0;
+	char folded_sign = ' ';
+	struct perf_hpp hpp = {
+		.buf = s,
+		.size = sizeof(s),
+	};
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	bool first = true;
+	int ret;
+	int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+	printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+	folded_sign = hist_entry__folded(he);
+	printed += fprintf(fp, "%c", folded_sign);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&he->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (!first) {
+			ret = scnprintf(hpp.buf, hpp.size, "  ");
+			advance_hpp(&hpp, ret);
+		} else
+			first = false;
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+	advance_hpp(&hpp, ret);
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		ret = scnprintf(hpp.buf, hpp.size, "  ");
+		advance_hpp(&hpp, ret);
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	printed += fprintf(fp, "%s\n", rtrim(s));
+
+	if (he->leaf && folded_sign == '-') {
+		printed += hist_browser__fprintf_callchain(browser, he, fp,
+							   he->depth + 1);
+	}
 
 	return printed;
 }
@@ -1444,8 +1992,16 @@
 	while (nd) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-		printed += hist_browser__fprintf_entry(browser, h, fp);
-		nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
+		if (symbol_conf.report_hierarchy) {
+			printed += hist_browser__fprintf_hierarchy_entry(browser,
+									 h, fp,
+									 h->depth);
+		} else {
+			printed += hist_browser__fprintf_entry(browser, h, fp);
+		}
+
+		nd = hists__filter_entries(rb_hierarchy_next(nd),
+					   browser->min_pcnt);
 	}
 
 	return printed;
@@ -1580,11 +2136,18 @@
 	if (hists->uid_filter_str)
 		printed += snprintf(bf + printed, size - printed,
 				    ", UID: %s", hists->uid_filter_str);
-	if (thread)
-		printed += scnprintf(bf + printed, size - printed,
+	if (thread) {
+		if (sort__has_thread) {
+			printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
 				     (thread->comm_set ? thread__comm_str(thread) : ""),
 				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", DSO: %s", dso->short_name);
@@ -1759,15 +2322,24 @@
 {
 	struct thread *thread = act->thread;
 
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+		return 0;
+
 	if (browser->hists->thread_filter) {
 		pstack__remove(browser->pstack, &browser->hists->thread_filter);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		thread__zput(browser->hists->thread_filter);
 		ui_helpline__pop();
 	} else {
-		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
-				   thread->comm_set ? thread__comm_str(thread) : "",
-				   thread->tid);
+		if (sort__has_thread) {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "",
+					   thread->tid);
+		} else {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "");
+		}
+
 		browser->hists->thread_filter = thread__get(thread);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		pstack__push(browser->pstack, &browser->hists->thread_filter);
@@ -1782,13 +2354,22 @@
 add_thread_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, struct thread *thread)
 {
-	if (thread == NULL)
+	int ret;
+
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
 		return 0;
 
-	if (asprintf(optstr, "Zoom %s %s(%d) thread",
-		     browser->hists->thread_filter ? "out of" : "into",
-		     thread->comm_set ? thread__comm_str(thread) : "",
-		     thread->tid) < 0)
+	if (sort__has_thread) {
+		ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "",
+			       thread->tid);
+	} else {
+		ret = asprintf(optstr, "Zoom %s %s thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "");
+	}
+	if (ret < 0)
 		return 0;
 
 	act->thread = thread;
@@ -1801,6 +2382,9 @@
 {
 	struct map *map = act->ms.map;
 
+	if (!sort__has_dso || map == NULL)
+		return 0;
+
 	if (browser->hists->dso_filter) {
 		pstack__remove(browser->pstack, &browser->hists->dso_filter);
 		perf_hpp__set_elide(HISTC_DSO, false);
@@ -1825,7 +2409,7 @@
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
 	    char **optstr, struct map *map)
 {
-	if (map == NULL)
+	if (!sort__has_dso || map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s %s DSO",
@@ -1850,7 +2434,7 @@
 add_map_opt(struct hist_browser *browser __maybe_unused,
 	    struct popup_action *act, char **optstr, struct map *map)
 {
-	if (map == NULL)
+	if (!sort__has_dso || map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Browse map details") < 0)
@@ -1952,6 +2536,9 @@
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
+	if (!sort__has_socket || act->socket < 0)
+		return 0;
+
 	if (browser->hists->socket_filter > -1) {
 		pstack__remove(browser->pstack, &browser->hists->socket_filter);
 		browser->hists->socket_filter = -1;
@@ -1971,7 +2558,7 @@
 add_socket_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, int socket_id)
 {
-	if (socket_id < 0)
+	if (!sort__has_socket || socket_id < 0)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -1989,17 +2576,60 @@
 	u64 nr_entries = 0;
 	struct rb_node *nd = rb_first(&hb->hists->entries);
 
-	if (hb->min_pcnt == 0) {
+	if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
 		hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
 		return;
 	}
 
 	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
 		nr_entries++;
-		nd = rb_next(nd);
+		nd = rb_hierarchy_next(nd);
 	}
 
 	hb->nr_non_filtered_entries = nr_entries;
+	hb->nr_hierarchy_entries = nr_entries;
+}
+
+static void hist_browser__update_percent_limit(struct hist_browser *hb,
+					       double percent)
+{
+	struct hist_entry *he;
+	struct rb_node *nd = rb_first(&hb->hists->entries);
+	u64 total = hists__total_period(hb->hists);
+	u64 min_callchain_hits = total * (percent / 100);
+
+	hb->min_pcnt = callchain_param.min_percent = percent;
+
+	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (he->has_no_entry) {
+			he->has_no_entry = false;
+			he->nr_rows = 0;
+		}
+
+		if (!he->leaf || !symbol_conf.use_callchain)
+			goto next;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+
+next:
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		/* force to re-evaluate folding state of callchains */
+		he->init_have_children = false;
+		hist_entry__set_folding(he, hb, false);
+	}
 }
 
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
@@ -2037,6 +2667,7 @@
 	"E             Expand all callchains\n"				\
 	"F             Toggle percentage of filtered entries\n"		\
 	"H             Display column headers\n"			\
+	"L             Change percent limit\n"				\
 	"m             Display context menu\n"				\
 	"S             Zoom into current Processor Socket\n"		\
 
@@ -2077,7 +2708,7 @@
 	memset(options, 0, sizeof(options));
 	memset(actions, 0, sizeof(actions));
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(browser->hists, fmt) {
 		perf_hpp__reset_width(fmt, hists);
 		/*
 		 * This is done just once, and activates the horizontal scrolling
@@ -2192,6 +2823,24 @@
 				top->zero = !top->zero;
 			}
 			continue;
+		case 'L':
+			if (ui_browser__input_window("Percent Limit",
+					"Please enter the value you want to hide entries under that percent.",
+					buf, "ENTER: OK, ESC: Cancel",
+					delay_secs * 2) == K_ENTER) {
+				char *end;
+				double new_percent = strtod(buf, &end);
+
+				if (new_percent < 0 || new_percent > 100) {
+					ui_browser__warning(&browser->b, delay_secs * 2,
+						"Invalid percent: %.2f", new_percent);
+					continue;
+				}
+
+				hist_browser__update_percent_limit(browser, new_percent);
+				hist_browser__reset(browser);
+			}
+			continue;
 		case K_F1:
 		case 'h':
 		case '?':
@@ -2263,10 +2912,7 @@
 			continue;
 		}
 
-		if (!sort__has_sym)
-			goto add_exit_option;
-
-		if (browser->selection == NULL)
+		if (!sort__has_sym || browser->selection == NULL)
 			goto skip_annotation;
 
 		if (sort__mode == SORT_MODE__BRANCH) {
@@ -2306,11 +2952,16 @@
 					     &options[nr_options],
 					     socked_id);
 		/* perf script support */
+		if (!is_report_browser(hbt))
+			goto skip_scripting;
+
 		if (browser->he_selection) {
-			nr_options += add_script_opt(browser,
-						     &actions[nr_options],
-						     &options[nr_options],
-						     thread, NULL);
+			if (sort__has_thread && thread) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     thread, NULL);
+			}
 			/*
 			 * Note that browser->selection != NULL
 			 * when browser->he_selection is not NULL,
@@ -2320,16 +2971,18 @@
 			 *
 			 * See hist_browser__show_entry.
 			 */
-			nr_options += add_script_opt(browser,
-						     &actions[nr_options],
-						     &options[nr_options],
-						     NULL, browser->selection->sym);
+			if (sort__has_sym && browser->selection->sym) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     NULL, browser->selection->sym);
+			}
 		}
 		nr_options += add_script_opt(browser, &actions[nr_options],
 					     &options[nr_options], NULL, NULL);
 		nr_options += add_switch_opt(browser, &actions[nr_options],
 					     &options[nr_options]);
-add_exit_option:
+skip_scripting:
 		nr_options += add_exit_opt(browser, &actions[nr_options],
 					   &options[nr_options]);
 
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 0f8dcfd..bd9bf7e 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -306,7 +306,7 @@
 
 	nr_cols = 0;
 
-	perf_hpp__for_each_format(fmt)
+	hists__for_each_format(hists, fmt)
 		col_types[nr_cols++] = G_TYPE_STRING;
 
 	store = gtk_tree_store_newv(nr_cols, col_types);
@@ -317,7 +317,7 @@
 
 	col_idx = 0;
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
@@ -367,7 +367,7 @@
 
 		col_idx = 0;
 
-		perf_hpp__for_each_format(fmt) {
+		hists__for_each_format(hists, fmt) {
 			if (perf_hpp__should_skip(fmt, h->hists))
 				continue;
 
@@ -396,6 +396,194 @@
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+					    struct rb_root *root,
+					    GtkTreeStore *store,
+					    GtkTreeIter *parent,
+					    struct perf_hpp *hpp,
+					    float min_pcnt)
+{
+	int col_idx = 0;
+	struct rb_node *node;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	u64 total = hists__total_period(hists);
+	int size;
+
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		GtkTreeIter iter;
+		float percent;
+		char *bf;
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		if (he->filtered)
+			continue;
+
+		percent = hist_entry__get_percent_limit(he);
+		if (percent < min_pcnt)
+			continue;
+
+		gtk_tree_store_append(store, &iter, parent);
+
+		col_idx = 0;
+
+		/* the first hpp_list_node is for overhead columns */
+		fmt_node = list_first_entry(&hists->hpp_formats,
+					    struct perf_hpp_list_node, list);
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (fmt->color)
+				fmt->color(fmt, hpp, he);
+			else
+				fmt->entry(fmt, hpp, he);
+
+			gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+		}
+
+		bf = hpp->buf;
+		size = hpp->size;
+		perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+			int ret;
+
+			if (fmt->color)
+				ret = fmt->color(fmt, hpp, he);
+			else
+				ret = fmt->entry(fmt, hpp, he);
+
+			snprintf(hpp->buf + ret, hpp->size - ret, "  ");
+			advance_hpp(hpp, ret + 2);
+		}
+
+		gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1);
+
+		if (!he->leaf) {
+			hpp->buf = bf;
+			hpp->size = size;
+
+			perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+							store, &iter, hpp,
+							min_pcnt);
+
+			if (!hist_entry__has_hierarchy_children(he, min_pcnt)) {
+				char buf[32];
+				GtkTreeIter child;
+
+				snprintf(buf, sizeof(buf), "no entry >= %.2f%%",
+					 min_pcnt);
+
+				gtk_tree_store_append(store, &child, &iter);
+				gtk_tree_store_set(store, &child, col_idx, buf, -1);
+			}
+		}
+
+		if (symbol_conf.use_callchain && he->leaf) {
+			if (callchain_param.mode == CHAIN_GRAPH_REL)
+				total = symbol_conf.cumulate_callchain ?
+					he->stat_acc->period : he->stat.period;
+
+			perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+						col_idx, total);
+		}
+	}
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+				     float min_pcnt)
+{
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	GType col_types[MAX_COLUMNS];
+	GtkCellRenderer *renderer;
+	GtkTreeStore *store;
+	GtkWidget *view;
+	int col_idx;
+	int nr_cols = 0;
+	char s[512];
+	char buf[512];
+	bool first_node, first_col;
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+	};
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
+	col_types[nr_cols++] = G_TYPE_STRING;
+
+	store = gtk_tree_store_newv(nr_cols, col_types);
+	view = gtk_tree_view_new();
+	renderer = gtk_cell_renderer_text_new();
+
+	col_idx = 0;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, fmt->name,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
+
+	/* construct merged column header since sort keys share single column */
+	buf[0] = '\0';
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node)
+			strcat(buf, " / ");
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				strcat(buf, "+");
+			first_col = false;
+
+			fmt->header(fmt, &hpp, hists_to_evsel(hists));
+			strcat(buf, ltrim(rtrim(hpp.buf)));
+		}
+	}
+
+	gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+						    -1, buf,
+						    renderer, "markup",
+						    col_idx++, NULL);
+
+	for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+		GtkTreeViewColumn *column;
+
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+		gtk_tree_view_column_set_resizable(column, TRUE);
+
+		if (col_idx == 0) {
+			gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+							  column);
+		}
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+					NULL, &hpp, min_pcnt);
+
+	gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+	g_signal_connect(view, "row-activated",
+			 G_CALLBACK(on_row_activated), NULL);
+	gtk_container_add(GTK_CONTAINER(window), view);
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help,
 				  struct hist_browser_timer *hbt __maybe_unused,
@@ -463,7 +651,10 @@
 							GTK_POLICY_AUTOMATIC,
 							GTK_POLICY_AUTOMATIC);
 
-		perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+		if (symbol_conf.report_hierarchy)
+			perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+		else
+			perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
 
 		tab_label = gtk_label_new(evname);
 
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index bf2a66e..3baeaa6 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -5,6 +5,7 @@
 #include "../util/util.h"
 #include "../util/sort.h"
 #include "../util/evsel.h"
+#include "../util/evlist.h"
 
 /* hist period print (hpp) functions */
 
@@ -371,7 +372,20 @@
 	return 0;
 }
 
-#define HPP__COLOR_PRINT_FNS(_name, _fn)		\
+static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a)
+{
+	return a->header == hpp__header_fn;
+}
+
+static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b))
+		return false;
+
+	return a->idx == b->idx;
+}
+
+#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx)		\
 	{						\
 		.name   = _name,			\
 		.header	= hpp__header_fn,		\
@@ -381,9 +395,11 @@
 		.cmp	= hpp__nop_cmp,			\
 		.collapse = hpp__nop_cmp,		\
 		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
 	}
 
-#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn)		\
+#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx)	\
 	{						\
 		.name   = _name,			\
 		.header	= hpp__header_fn,		\
@@ -393,9 +409,11 @@
 		.cmp	= hpp__nop_cmp,			\
 		.collapse = hpp__nop_cmp,		\
 		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
 	}
 
-#define HPP__PRINT_FNS(_name, _fn)			\
+#define HPP__PRINT_FNS(_name, _fn, _idx)		\
 	{						\
 		.name   = _name,			\
 		.header	= hpp__header_fn,		\
@@ -404,22 +422,25 @@
 		.cmp	= hpp__nop_cmp,			\
 		.collapse = hpp__nop_cmp,		\
 		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
 	}
 
 struct perf_hpp_fmt perf_hpp__format[] = {
-	HPP__COLOR_PRINT_FNS("Overhead", overhead),
-	HPP__COLOR_PRINT_FNS("sys", overhead_sys),
-	HPP__COLOR_PRINT_FNS("usr", overhead_us),
-	HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys),
-	HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
-	HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
-	HPP__PRINT_FNS("Samples", samples),
-	HPP__PRINT_FNS("Period", period)
+	HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD),
+	HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS),
+	HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US),
+	HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS),
+	HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
+	HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
+	HPP__PRINT_FNS("Samples", samples, SAMPLES),
+	HPP__PRINT_FNS("Period", period, PERIOD)
 };
 
-LIST_HEAD(perf_hpp__list);
-LIST_HEAD(perf_hpp__sort_list);
-
+struct perf_hpp_list perf_hpp_list = {
+	.fields	= LIST_HEAD_INIT(perf_hpp_list.fields),
+	.sorts	= LIST_HEAD_INIT(perf_hpp_list.sorts),
+};
 
 #undef HPP__COLOR_PRINT_FNS
 #undef HPP__COLOR_ACC_PRINT_FNS
@@ -485,9 +506,16 @@
 		hpp_dimension__add_output(PERF_HPP__PERIOD);
 }
 
-void perf_hpp__column_register(struct perf_hpp_fmt *format)
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format)
 {
-	list_add_tail(&format->list, &perf_hpp__list);
+	list_add_tail(&format->list, &list->fields);
+}
+
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format)
+{
+	list_add_tail(&format->sort_list, &list->sorts);
 }
 
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
@@ -495,53 +523,43 @@
 	list_del(&format->list);
 }
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
-{
-	list_add_tail(&format->sort_list, &perf_hpp__sort_list);
-}
-
-void perf_hpp__column_enable(unsigned col)
-{
-	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	perf_hpp__column_register(&perf_hpp__format[col]);
-}
-
-void perf_hpp__column_disable(unsigned col)
-{
-	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	perf_hpp__column_unregister(&perf_hpp__format[col]);
-}
-
 void perf_hpp__cancel_cumulate(void)
 {
+	struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp;
+
 	if (is_strict_order(field_order))
 		return;
 
-	perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
-	perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead";
+	ovh = &perf_hpp__format[PERF_HPP__OVERHEAD];
+	acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC];
+
+	perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+		if (acc->equal(acc, fmt)) {
+			perf_hpp__column_unregister(fmt);
+			continue;
+		}
+
+		if (ovh->equal(ovh, fmt))
+			fmt->name = "Overhead";
+	}
 }
 
-void perf_hpp__setup_output_field(void)
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	return a->equal && a->equal(a, b);
+}
+
+void perf_hpp__setup_output_field(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt;
 
 	/* append sort keys to output field */
-	perf_hpp__for_each_sort_list(fmt) {
-		if (!list_empty(&fmt->list))
-			continue;
+	perf_hpp_list__for_each_sort_list(list, fmt) {
+		struct perf_hpp_fmt *pos;
 
-		/*
-		 * sort entry fields are dynamically created,
-		 * so they can share a same sort key even though
-		 * the list is empty.
-		 */
-		if (perf_hpp__is_sort_entry(fmt)) {
-			struct perf_hpp_fmt *pos;
-
-			perf_hpp__for_each_format(pos) {
-				if (perf_hpp__same_sort_entry(pos, fmt))
-					goto next;
-			}
+		perf_hpp_list__for_each_format(list, pos) {
+			if (fmt_equal(fmt, pos))
+				goto next;
 		}
 
 		perf_hpp__column_register(fmt);
@@ -550,27 +568,17 @@
 	}
 }
 
-void perf_hpp__append_sort_keys(void)
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt;
 
 	/* append output fields to sort keys */
-	perf_hpp__for_each_format(fmt) {
-		if (!list_empty(&fmt->sort_list))
-			continue;
+	perf_hpp_list__for_each_format(list, fmt) {
+		struct perf_hpp_fmt *pos;
 
-		/*
-		 * sort entry fields are dynamically created,
-		 * so they can share a same sort key even though
-		 * the list is empty.
-		 */
-		if (perf_hpp__is_sort_entry(fmt)) {
-			struct perf_hpp_fmt *pos;
-
-			perf_hpp__for_each_sort_list(pos) {
-				if (perf_hpp__same_sort_entry(pos, fmt))
-					goto next;
-			}
+		perf_hpp_list__for_each_sort_list(list, pos) {
+			if (fmt_equal(fmt, pos))
+				goto next;
 		}
 
 		perf_hpp__register_sort_field(fmt);
@@ -579,20 +587,29 @@
 	}
 }
 
-void perf_hpp__reset_output_field(void)
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+	if (fmt->free)
+		fmt->free(fmt);
+}
+
+void perf_hpp__reset_output_field(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt, *tmp;
 
 	/* reset output fields */
-	perf_hpp__for_each_format_safe(fmt, tmp) {
+	perf_hpp_list__for_each_format_safe(list, fmt, tmp) {
 		list_del_init(&fmt->list);
 		list_del_init(&fmt->sort_list);
+		fmt_free(fmt);
 	}
 
 	/* reset sort keys */
-	perf_hpp__for_each_sort_list_safe(fmt, tmp) {
+	perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) {
 		list_del_init(&fmt->list);
 		list_del_init(&fmt->sort_list);
+		fmt_free(fmt);
 	}
 }
 
@@ -606,7 +623,7 @@
 	bool first = true;
 	struct perf_hpp dummy_hpp;
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
@@ -624,22 +641,39 @@
 	return ret;
 }
 
+unsigned int hists__overhead_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	int ret = 0;
+	bool first = true;
+	struct perf_hpp dummy_hpp;
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (first)
+			first = false;
+		else
+			ret += 2;
+
+		ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+	}
+
+	return ret;
+}
+
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 {
-	int idx;
-
 	if (perf_hpp__is_sort_entry(fmt))
 		return perf_hpp__reset_sort_width(fmt, hists);
 
-	for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
-		if (fmt == &perf_hpp__format[idx])
-			break;
-	}
-
-	if (idx == PERF_HPP__MAX_INDEX)
+	if (perf_hpp__is_dynamic_entry(fmt))
 		return;
 
-	switch (idx) {
+	BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX);
+
+	switch (fmt->idx) {
 	case PERF_HPP__OVERHEAD:
 	case PERF_HPP__OVERHEAD_SYS:
 	case PERF_HPP__OVERHEAD_US:
@@ -667,7 +701,7 @@
 	struct perf_hpp_fmt *fmt;
 	const char *ptr = width_list_str;
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		char *p;
 
 		int len = strtol(ptr, &p, 10);
@@ -679,3 +713,71 @@
 			break;
 	}
 }
+
+static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_list_node *node = NULL;
+	struct perf_hpp_fmt *fmt_copy;
+	bool found = false;
+	bool skip = perf_hpp__should_skip(fmt, hists);
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		if (node->level == fmt->level) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		node = malloc(sizeof(*node));
+		if (node == NULL)
+			return -1;
+
+		node->skip = skip;
+		node->level = fmt->level;
+		perf_hpp_list__init(&node->hpp);
+
+		hists->nr_hpp_node++;
+		list_add_tail(&node->list, &hists->hpp_formats);
+	}
+
+	fmt_copy = perf_hpp_fmt__dup(fmt);
+	if (fmt_copy == NULL)
+		return -1;
+
+	if (!skip)
+		node->skip = false;
+
+	list_add_tail(&fmt_copy->list, &node->hpp.fields);
+	list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts);
+
+	return 0;
+}
+
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	struct perf_hpp_fmt *fmt;
+	struct hists *hists;
+	int ret;
+
+	if (!symbol_conf.report_hierarchy)
+		return 0;
+
+	evlist__for_each(evlist, evsel) {
+		hists = evsel__hists(evsel);
+
+		perf_hpp_list__for_each_sort_list(list, fmt) {
+			if (perf_hpp__is_dynamic_entry(fmt) &&
+			    !perf_hpp__defined_dynamic_entry(fmt, hists))
+				continue;
+
+			ret = add_hierarchy_fmt(hists, fmt);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 387110d..7aff5ac 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -165,8 +165,28 @@
 	return ret;
 }
 
+/*
+ * If have one single callchain root, don't bother printing
+ * its percentage (100 % in fractal mode and the same percentage
+ * than the hist in graph mode). This also avoid one level of column.
+ *
+ * However when percent-limit applied, it's possible that single callchain
+ * node have different (non-100% in fractal mode) percentage.
+ */
+static bool need_percent_display(struct rb_node *node, u64 parent_samples)
+{
+	struct callchain_node *cnode;
+
+	if (rb_next(node))
+		return true;
+
+	cnode = rb_entry(node, struct callchain_node, rb_node);
+	return callchain_cumul_hits(cnode) != parent_samples;
+}
+
 static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
-				       u64 total_samples, int left_margin)
+				       u64 total_samples, u64 parent_samples,
+				       int left_margin)
 {
 	struct callchain_node *cnode;
 	struct callchain_list *chain;
@@ -177,13 +197,8 @@
 	int ret = 0;
 	char bf[1024];
 
-	/*
-	 * If have one single callchain root, don't bother printing
-	 * its percentage (100 % in fractal mode and the same percentage
-	 * than the hist in graph mode). This also avoid one level of column.
-	 */
 	node = rb_first(root);
-	if (node && !rb_next(node)) {
+	if (node && !need_percent_display(node, parent_samples)) {
 		cnode = rb_entry(node, struct callchain_node, rb_node);
 		list_for_each_entry(chain, &cnode->val, list) {
 			/*
@@ -213,9 +228,15 @@
 		root = &cnode->rb_root;
 	}
 
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		total_samples = parent_samples;
+
 	ret += __callchain__fprintf_graph(fp, root, total_samples,
 					  1, 1, left_margin);
-	ret += fprintf(fp, "\n");
+	if (ret) {
+		/* do not add a blank line if it printed nothing */
+		ret += fprintf(fp, "\n");
+	}
 
 	return ret;
 }
@@ -323,16 +344,19 @@
 					    u64 total_samples, int left_margin,
 					    FILE *fp)
 {
+	u64 parent_samples = he->stat.period;
+
+	if (symbol_conf.cumulate_callchain)
+		parent_samples = he->stat_acc->period;
+
 	switch (callchain_param.mode) {
 	case CHAIN_GRAPH_REL:
-		return callchain__fprintf_graph(fp, &he->sorted_chain,
-						symbol_conf.cumulate_callchain ?
-						he->stat_acc->period : he->stat.period,
-						left_margin);
+		return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+						parent_samples, left_margin);
 		break;
 	case CHAIN_GRAPH_ABS:
 		return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
-						left_margin);
+						parent_samples, left_margin);
 		break;
 	case CHAIN_FLAT:
 		return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
@@ -349,30 +373,6 @@
 	return 0;
 }
 
-static size_t hist_entry__callchain_fprintf(struct hist_entry *he,
-					    struct hists *hists,
-					    FILE *fp)
-{
-	int left_margin = 0;
-	u64 total_period = hists->stats.total_period;
-
-	if (field_order == NULL && (sort_order == NULL ||
-				    !prefixcmp(sort_order, "comm"))) {
-		struct perf_hpp_fmt *fmt;
-
-		perf_hpp__for_each_format(fmt) {
-			if (!perf_hpp__is_sort_entry(fmt))
-				continue;
-
-			/* must be 'comm' sort entry */
-			left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists));
-			left_margin -= thread__comm_len(he->thread);
-			break;
-		}
-	}
-	return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
-}
-
 static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 {
 	const char *sep = symbol_conf.field_sep;
@@ -384,7 +384,7 @@
 	if (symbol_conf.exclude_other && !he->parent)
 		return 0;
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(he->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
 			continue;
 
@@ -403,12 +403,94 @@
 		else
 			ret = fmt->entry(fmt, hpp, he);
 
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
 		advance_hpp(hpp, ret);
 	}
 
 	return hpp->buf - start;
 }
 
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+					 struct perf_hpp *hpp,
+					 struct hists *hists,
+					 FILE *fp)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	char *buf = hpp->buf;
+	size_t size = hpp->size;
+	int ret, printed = 0;
+	bool first = true;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		/*
+		 * If there's no field_sep, we still need
+		 * to display initial '  '.
+		 */
+		if (!sep || !first) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		} else
+			first = false;
+
+		if (perf_hpp__use_color() && fmt->color)
+			ret = fmt->color(fmt, hpp, he);
+		else
+			ret = fmt->entry(fmt, hpp, he);
+
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+		advance_hpp(hpp, ret);
+	}
+
+	if (!sep)
+		ret = scnprintf(hpp->buf, hpp->size, "%*s",
+				(hists->nr_hpp_node - 2) * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	printed += fprintf(fp, "%s", buf);
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		hpp->buf  = buf;
+		hpp->size = size;
+
+		/*
+		 * No need to call hist_entry__snprintf_alignment() since this
+		 * fmt is always the last column in the hierarchy mode.
+		 */
+		if (perf_hpp__use_color() && fmt->color)
+			fmt->color(fmt, hpp, he);
+		else
+			fmt->entry(fmt, hpp, he);
+
+		/*
+		 * dynamic entries are right-aligned but we want left-aligned
+		 * in the hierarchy mode
+		 */
+		printed += fprintf(fp, "%s%s", sep ?: "  ", ltrim(buf));
+	}
+	printed += putc('\n', fp);
+
+	if (symbol_conf.use_callchain && he->leaf) {
+		u64 total = hists__total_period(hists);
+
+		printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+		goto out;
+	}
+
+out:
+	return printed;
+}
+
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 			       struct hists *hists,
 			       char *bf, size_t bfsz, FILE *fp)
@@ -418,24 +500,134 @@
 		.buf		= bf,
 		.size		= size,
 	};
+	u64 total_period = hists->stats.total_period;
 
 	if (size == 0 || size > bfsz)
 		size = hpp.size = bfsz;
 
+	if (symbol_conf.report_hierarchy)
+		return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
+
 	hist_entry__snprintf(he, &hpp);
 
 	ret = fprintf(fp, "%s\n", bf);
 
 	if (symbol_conf.use_callchain)
-		ret += hist_entry__callchain_fprintf(he, hists, fp);
+		ret += hist_entry_callchain__fprintf(he, total_period, 0, fp);
 
 	return ret;
 }
 
+static int print_hierarchy_indent(const char *sep, int indent,
+				  const char *line, FILE *fp)
+{
+	if (sep != NULL || indent < 2)
+		return 0;
+
+	return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
+}
+
+static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
+				  const char *sep, FILE *fp)
+{
+	bool first_node, first_col;
+	int indent;
+	int depth;
+	unsigned width = 0;
+	unsigned header_width = 0;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+
+	indent = hists->nr_hpp_node;
+
+	/* preserve max indent depth for column headers */
+	print_hierarchy_indent(sep, indent, spaces, fp);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		fmt->header(fmt, hpp, hists_to_evsel(hists));
+		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
+	}
+
+	/* combine sort headers with ' / ' */
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node)
+			header_width += fprintf(fp, " / ");
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				header_width += fprintf(fp, "+");
+			first_col = false;
+
+			fmt->header(fmt, hpp, hists_to_evsel(hists));
+			rtrim(hpp->buf);
+
+			header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+		}
+	}
+
+	fprintf(fp, "\n# ");
+
+	/* preserve max indent depth for initial dots */
+	print_hierarchy_indent(sep, indent, dots, fp);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+
+	first_col = true;
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (!first_col)
+			fprintf(fp, "%s", sep ?: "..");
+		first_col = false;
+
+		width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+		fprintf(fp, "%.*s", width, dots);
+	}
+
+	depth = 0;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		first_col = true;
+		width = depth * HIERARCHY_INDENT;
+
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				width++;  /* for '+' sign between column header */
+			first_col = false;
+
+			width += fmt->width(fmt, hpp, hists_to_evsel(hists));
+		}
+
+		if (width > header_width)
+			header_width = width;
+
+		depth++;
+	}
+
+	fprintf(fp, "%s%-.*s", sep ?: "  ", header_width, dots);
+
+	fprintf(fp, "\n#\n");
+
+	return 2;
+}
+
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp)
 {
 	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
 	struct rb_node *nd;
 	size_t ret = 0;
 	unsigned int width;
@@ -449,10 +641,11 @@
 	bool first = true;
 	size_t linesz;
 	char *line = NULL;
+	unsigned indent;
 
 	init_rem_hits();
 
-	perf_hpp__for_each_format(fmt)
+	hists__for_each_format(hists, fmt)
 		perf_hpp__reset_width(fmt, hists);
 
 	if (symbol_conf.col_width_list_str)
@@ -463,7 +656,16 @@
 
 	fprintf(fp, "# ");
 
-	perf_hpp__for_each_format(fmt) {
+	if (symbol_conf.report_hierarchy) {
+		list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
+			perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+				perf_hpp__reset_width(fmt, hists);
+		}
+		nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
+		goto print_entries;
+	}
+
+	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
@@ -487,7 +689,7 @@
 
 	fprintf(fp, "# ");
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(hists, fmt) {
 		unsigned int i;
 
 		if (perf_hpp__should_skip(fmt, hists))
@@ -520,7 +722,9 @@
 		goto out;
 	}
 
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+	indent = hists__overhead_width(hists) + 4;
+
+	for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		float percent;
 
@@ -536,6 +740,20 @@
 		if (max_rows && ++nr_rows >= max_rows)
 			break;
 
+		/*
+		 * If all children are filtered out or percent-limited,
+		 * display "no entry >= x.xx%" message.
+		 */
+		if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
+			int depth = hists->nr_hpp_node + h->depth + 1;
+
+			print_hierarchy_indent(sep, depth, spaces, fp);
+			fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
+
+			if (max_rows && ++nr_rows >= max_rows)
+				break;
+		}
+
 		if (h->ms.map == NULL && verbose > 1) {
 			__map_groups__fprintf_maps(h->thread->mg,
 						   MAP__FUNCTION, fp);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5eec53a..eea25e2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -82,6 +82,7 @@
 libperf-y += parse-regs-options.o
 libperf-y += term.o
 libperf-y += help-unknown-cmd.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -105,8 +106,17 @@
 
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
+libperf-y += demangle-java.o
+
+ifdef CONFIG_JITDUMP
+libperf-$(CONFIG_LIBELF) += jitdump.o
+libperf-$(CONFIG_LIBELF) += genelf.o
+libperf-$(CONFIG_LIBELF) += genelf_debug.o
+endif
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o  += -Wno-packed
 
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
 	$(call rule_mkdir)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 360fda0..ec164fe 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -478,10 +478,11 @@
 			 heap_array[last].ordinal);
 }
 
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist)
 {
 	if (itr)
-		return itr->info_priv_size(itr);
+		return itr->info_priv_size(itr, evlist);
 	return 0;
 }
 
@@ -852,7 +853,7 @@
 	int err;
 
 	pr_debug2("Synthesizing auxtrace information\n");
-	priv_size = auxtrace_record__info_priv_size(itr);
+	priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
 	ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
 	if (!ev)
 		return -ENOMEM;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index b86f90db..e5a8e2d 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -293,7 +293,8 @@
 	int (*recording_options)(struct auxtrace_record *itr,
 				 struct perf_evlist *evlist,
 				 struct record_opts *opts);
-	size_t (*info_priv_size)(struct auxtrace_record *itr);
+	size_t (*info_priv_size)(struct auxtrace_record *itr,
+				 struct perf_evlist *evlist);
 	int (*info_fill)(struct auxtrace_record *itr,
 			 struct perf_session *session,
 			 struct auxtrace_info_event *auxtrace_info,
@@ -429,7 +430,8 @@
 int auxtrace_record__options(struct auxtrace_record *itr,
 			     struct perf_evlist *evlist,
 			     struct record_opts *opts);
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist);
 int auxtrace_record__info_fill(struct auxtrace_record *itr,
 			       struct perf_session *session,
 			       struct auxtrace_info_event *auxtrace_info,
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 540a7ef..0967ce6 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -7,6 +7,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/libbpf.h>
+#include <bpf/bpf.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include "perf.h"
@@ -16,6 +17,7 @@
 #include "llvm-utils.h"
 #include "probe-event.h"
 #include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
 #include "llvm-utils.h"
 
 #define DEFINE_PRINT_FN(name, level) \
@@ -108,8 +110,8 @@
 }
 
 static void
-bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
-		     void *_priv)
+clear_prog_priv(struct bpf_program *prog __maybe_unused,
+		void *_priv)
 {
 	struct bpf_prog_priv *priv = _priv;
 
@@ -337,7 +339,7 @@
 	}
 	pr_debug("bpf: config '%s' is ok\n", config_str);
 
-	err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear);
+	err = bpf_program__set_private(prog, priv, clear_prog_priv);
 	if (err) {
 		pr_debug("Failed to set priv for program '%s'\n", config_str);
 		goto errout;
@@ -739,6 +741,682 @@
 	return 0;
 }
 
+enum bpf_map_op_type {
+	BPF_MAP_OP_SET_VALUE,
+	BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+	BPF_MAP_KEY_ALL,
+	BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+	struct list_head list;
+	enum bpf_map_op_type op_type;
+	enum bpf_map_key_type key_type;
+	union {
+		struct parse_events_array array;
+	} k;
+	union {
+		u64 value;
+		struct perf_evsel *evsel;
+	} v;
+};
+
+struct bpf_map_priv {
+	struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+	if (!list_empty(&op->list))
+		list_del(&op->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES)
+		parse_events__clear_array(&op->k.array);
+	free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+	struct bpf_map_op *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+		list_del_init(&pos->list);
+		bpf_map_op__delete(pos);
+	}
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+		    void *_priv)
+{
+	struct bpf_map_priv *priv = _priv;
+
+	bpf_map_priv__purge(priv);
+	free(priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+	op->key_type = BPF_MAP_KEY_ALL;
+	if (!term)
+		return 0;
+
+	if (term->array.nr_ranges) {
+		size_t memsz = term->array.nr_ranges *
+				sizeof(op->k.array.ranges[0]);
+
+		op->k.array.ranges = memdup(term->array.ranges, memsz);
+		if (!op->k.array.ranges) {
+			pr_debug("No enough memory to alloc indices for map\n");
+			return -ENOMEM;
+		}
+		op->key_type = BPF_MAP_KEY_RANGES;
+		op->k.array.nr_ranges = term->array.nr_ranges;
+	}
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = zalloc(sizeof(*op));
+	if (!op) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	INIT_LIST_HEAD(&op->list);
+
+	err = bpf_map_op_setkey(op, term);
+	if (err) {
+		free(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+	struct bpf_map_priv *priv;
+	const char *map_name;
+	int err;
+
+	map_name = bpf_map__get_name(map);
+	err = bpf_map__get_private(map, (void **)&priv);
+	if (err) {
+		pr_debug("Failed to get private from map %s\n", map_name);
+		return err;
+	}
+
+	if (!priv) {
+		priv = zalloc(sizeof(*priv));
+		if (!priv) {
+			pr_debug("No enough memory to alloc map private\n");
+			return -ENOMEM;
+		}
+		INIT_LIST_HEAD(&priv->ops_list);
+
+		if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+			free(priv);
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+	}
+
+	list_add_tail(&op->list, &priv->ops_list);
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = bpf_map_op__new(term);
+	if (IS_ERR(op))
+		return op;
+
+	err = bpf_map__add_op(map, op);
+	if (err) {
+		bpf_map_op__delete(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+			struct parse_events_term *term)
+{
+	struct bpf_map_def def;
+	struct bpf_map_op *op;
+	const char *map_name;
+	int err;
+
+	map_name = bpf_map__get_name(map);
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("Unable to get map definition from '%s'\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (def.type != BPF_MAP_TYPE_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+	if (def.key_size < sizeof(unsigned int)) {
+		pr_debug("Map %s has incorrect key size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+	}
+	switch (def.value_size) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		pr_debug("Map %s has incorrect value size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_VALUE;
+	op->v.value = term->val.num;
+	return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+		pr_debug("ERROR: wrong value type for 'value'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+			struct parse_events_term *term,
+			struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	struct bpf_map_def def;
+	struct bpf_map_op *op;
+	const char *map_name;
+	int err;
+
+	map_name = bpf_map__get_name(map);
+	evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
+	if (!evsel) {
+		pr_debug("Event (for '%s') '%s' doesn't exist\n",
+			 map_name, term->val.str);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+	}
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("Unable to get map definition from '%s'\n",
+			 map_name);
+		return err;
+	}
+
+	/*
+	 * No need to check key_size and value_size:
+	 * kernel has already checked them.
+	 */
+	if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_EVSEL;
+	op->v.evsel = evsel;
+	return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct perf_evlist *evlist)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+		pr_debug("ERROR: wrong value type for 'event'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+	const char *config_opt;
+	int (*config_func)(struct bpf_map *, struct parse_events_term *,
+			   struct perf_evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+	{"value", bpf_map__config_value},
+	{"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+			       struct bpf_map *map,
+			       const char *map_name)
+{
+	struct parse_events_array *array = &term->array;
+	struct bpf_map_def def;
+	unsigned int i;
+	int err;
+
+	if (!array->nr_ranges)
+		return 0;
+	if (!array->ranges) {
+		pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+			 map_name, (int)array->nr_ranges);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("ERROR: Unable to get map definition from '%s'\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	for (i = 0; i < array->nr_ranges; i++) {
+		unsigned int start = array->ranges[i].start;
+		size_t length = array->ranges[i].length;
+		unsigned int idx = start + length - 1;
+
+		if (idx >= def.max_entries) {
+			pr_debug("ERROR: index %d too large\n", idx);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+		}
+	}
+	return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct perf_evlist *evlist,
+		    int *key_scan_pos)
+{
+	/* key is "map:<mapname>.<config opt>" */
+	char *map_name = strdup(term->config + sizeof("map:") - 1);
+	struct bpf_map *map;
+	int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+	char *map_opt;
+	size_t i;
+
+	if (!map_name)
+		return -ENOMEM;
+
+	map_opt = strchr(map_name, '.');
+	if (!map_opt) {
+		pr_debug("ERROR: Invalid map config: %s\n", map_name);
+		goto out;
+	}
+
+	*map_opt++ = '\0';
+	if (*map_opt == '\0') {
+		pr_debug("ERROR: Invalid map option: %s\n", term->config);
+		goto out;
+	}
+
+	map = bpf_object__get_map_by_name(obj, map_name);
+	if (!map) {
+		pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+		err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+		goto out;
+	}
+
+	*key_scan_pos += strlen(map_opt);
+	err = config_map_indices_range_check(term, map, map_name);
+	if (err)
+		goto out;
+	*key_scan_pos -= strlen(map_opt);
+
+	for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+		struct bpf_obj_config__map_func *func =
+				&bpf_obj_config__map_funcs[i];
+
+		if (strcmp(map_opt, func->config_opt) == 0) {
+			err = func->config_func(map, term, evlist);
+			goto out;
+		}
+	}
+
+	pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+	err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+	free(map_name);
+	if (!err)
+		key_scan_pos += strlen(map_opt);
+	return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct perf_evlist *evlist,
+		    int *error_pos)
+{
+	int key_scan_pos = 0;
+	int err;
+
+	if (!obj || !term || !term->config)
+		return -EINVAL;
+
+	if (!prefixcmp(term->config, "map:")) {
+		key_scan_pos = sizeof("map:") - 1;
+		err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+		goto out;
+	}
+	err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+	if (error_pos)
+		*error_pos = key_scan_pos;
+	return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+				 struct bpf_map_def *pdef,
+				 struct bpf_map_op *op,
+				 void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+		      void *arg, const char *name,
+		      int map_fd, struct bpf_map_def *pdef,
+		      struct bpf_map_op *op)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < pdef->max_entries; i++) {
+		err = func(name, map_fd, pdef, op, &i, arg);
+		if (err) {
+			pr_debug("ERROR: failed to insert value to %s[%u]\n",
+				 name, i);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+			 const char *name, int map_fd,
+			 struct bpf_map_def *pdef,
+			 struct bpf_map_op *op)
+{
+	unsigned int i, j;
+	int err;
+
+	for (i = 0; i < op->k.array.nr_ranges; i++) {
+		unsigned int start = op->k.array.ranges[i].start;
+		size_t length = op->k.array.ranges[i].length;
+
+		for (j = 0; j < length; j++) {
+			unsigned int idx = start + j;
+
+			err = func(name, map_fd, pdef, op, &idx, arg);
+			if (err) {
+				pr_debug("ERROR: failed to insert value to %s[%u]\n",
+					 name, idx);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+			   map_config_func_t func,
+			   void *arg)
+{
+	int err, map_fd;
+	const char *name;
+	struct bpf_map_op *op;
+	struct bpf_map_def def;
+	struct bpf_map_priv *priv;
+
+	name = bpf_map__get_name(map);
+
+	err = bpf_map__get_private(map, (void **)&priv);
+	if (err) {
+		pr_debug("ERROR: failed to get private from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	if (!priv || list_empty(&priv->ops_list)) {
+		pr_debug("INFO: nothing to config for map %s\n", name);
+		return 0;
+	}
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("ERROR: failed to get definition from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	map_fd = bpf_map__get_fd(map);
+	if (map_fd < 0) {
+		pr_debug("ERROR: failed to get fd from map %s\n", name);
+		return map_fd;
+	}
+
+	list_for_each_entry(op, &priv->ops_list, list) {
+		switch (def.type) {
+		case BPF_MAP_TYPE_ARRAY:
+		case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+			switch (op->key_type) {
+			case BPF_MAP_KEY_ALL:
+				err = foreach_key_array_all(func, arg, name,
+							    map_fd, &def, op);
+				break;
+			case BPF_MAP_KEY_RANGES:
+				err = foreach_key_array_ranges(func, arg, name,
+							       map_fd, &def,
+							       op);
+				break;
+			default:
+				pr_debug("ERROR: keytype for map '%s' invalid\n",
+					 name);
+				return -BPF_LOADER_ERRNO__INTERNAL;
+			}
+			if (err)
+				return err;
+			break;
+		default:
+			pr_debug("ERROR: type of '%s' incorrect\n", name);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+		}
+	}
+
+	return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+			   size_t val_size, u64 val)
+{
+	int err = 0;
+
+	switch (val_size) {
+	case 1: {
+		u8 _val = (u8)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 2: {
+		u16 _val = (u16)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 4: {
+		u32 _val = (u32)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 8: {
+		err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+		break;
+	}
+	default:
+		pr_debug("ERROR: invalid value size\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+			   struct perf_evsel *evsel)
+{
+	struct xyarray *xy = evsel->fd;
+	struct perf_event_attr *attr;
+	unsigned int key, events;
+	bool check_pass = false;
+	int *evt_fd;
+	int err;
+
+	if (!xy) {
+		pr_debug("ERROR: evsel not ready for map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (xy->row_size / xy->entry_size != 1) {
+		pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+			 name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+	}
+
+	attr = &evsel->attr;
+	if (attr->inherit) {
+		pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+	}
+
+	if (perf_evsel__is_bpf_output(evsel))
+		check_pass = true;
+	if (attr->type == PERF_TYPE_RAW)
+		check_pass = true;
+	if (attr->type == PERF_TYPE_HARDWARE)
+		check_pass = true;
+	if (!check_pass) {
+		pr_debug("ERROR: Event type is wrong for map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+	}
+
+	events = xy->entries / (xy->row_size / xy->entry_size);
+	key = *((unsigned int *)pkey);
+	if (key >= events) {
+		pr_debug("ERROR: there is no event %d for map %s\n",
+			 key, name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+	}
+	evt_fd = xyarray__entry(xy, key, 0);
+	err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+			     struct bpf_map_def *pdef __maybe_unused,
+			     struct bpf_map_op *op,
+			     void *pkey, void *arg __maybe_unused)
+{
+	int err;
+
+	switch (op->op_type) {
+	case BPF_MAP_OP_SET_VALUE:
+		err = apply_config_value_for_key(map_fd, pkey,
+						 pdef->value_size,
+						 op->v.value);
+		break;
+	case BPF_MAP_OP_SET_EVSEL:
+		err = apply_config_evsel_for_key(name, map_fd, pkey,
+						 op->v.evsel);
+		break;
+	default:
+		pr_debug("ERROR: unknown value type for '%s'\n", name);
+		err = -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+	return bpf_map_config_foreach_key(map,
+					  apply_obj_config_map_for_key,
+					  NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+	int err;
+
+	bpf_map__for_each(map, obj) {
+		err = apply_obj_config_map(map);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+	struct bpf_object *obj, *tmp;
+	int err;
+
+	bpf_object__for_each_safe(obj, tmp) {
+		err = apply_obj_config_object(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 #define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
 #define ERRCODE_OFFSET(c)	ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
 #define NR_ERRNO	(__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -753,6 +1431,20 @@
 	[ERRCODE_OFFSET(PROLOGUE)]	= "Failed to generate prologue",
 	[ERRCODE_OFFSET(PROLOGUE2BIG)]	= "Prologue too big for program",
 	[ERRCODE_OFFSET(PROLOGUEOOB)]	= "Offset out of bound for prologue",
+	[ERRCODE_OFFSET(OBJCONF_OPT)]	= "Invalid object config option",
+	[ERRCODE_OFFSET(OBJCONF_CONF)]	= "Config value not set (missing '=')",
+	[ERRCODE_OFFSET(OBJCONF_MAP_OPT)]	= "Invalid object map config option",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]	= "Target map doesn't exist",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]	= "Incorrect value type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]	= "Incorrect map type",
+	[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]	= "Incorrect map key size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)]	= "Incorrect map value size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]	= "Event not found for map setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]	= "Invalid map size for event setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]	= "Event dimension too large",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]	= "Doesn't support inherit event",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]	= "Wrong event type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]	= "Index too large",
 };
 
 static int
@@ -872,3 +1564,29 @@
 	bpf__strerror_end(buf, size);
 	return 0;
 }
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			     struct parse_events_term *term __maybe_unused,
+			     struct perf_evlist *evlist __maybe_unused,
+			     int *error_pos __maybe_unused, int err,
+			     char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+			    "Can't use this config term with this map type");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+			    "Cannot set event to BPF map in multi-thread tracing");
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+			    "%s (Hint: use -i to turn off inherit)", emsg);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+			    "Can only put raw, hardware and BPF output event into a BPF map");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 6fdc045..be43119 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -10,6 +10,7 @@
 #include <string.h>
 #include <bpf/libbpf.h>
 #include "probe-event.h"
+#include "evlist.h"
 #include "debug.h"
 
 enum bpf_loader_errno {
@@ -24,10 +25,25 @@
 	BPF_LOADER_ERRNO__PROLOGUE,	/* Failed to generate prologue */
 	BPF_LOADER_ERRNO__PROLOGUE2BIG,	/* Prologue too big for program */
 	BPF_LOADER_ERRNO__PROLOGUEOOB,	/* Offset out of bound for prologue */
+	BPF_LOADER_ERRNO__OBJCONF_OPT,	/* Invalid object config option */
+	BPF_LOADER_ERRNO__OBJCONF_CONF,	/* Config value not set (lost '=')) */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,	/* Invalid object map config option */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST,	/* Target map not exist */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,	/* Incorrect value type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,	/* Incorrect map type */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,	/* Incorrect map key size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,	/* Event not found for map setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,	/* Invalid map size for event setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,	/* Event dimension too large */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,	/* Doesn't support inherit event */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,	/* Wrong event type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,	/* Index too large */
 	__BPF_LOADER_ERRNO__END,
 };
 
 struct bpf_object;
+struct parse_events_term;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
 typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
@@ -53,6 +69,16 @@
 		       char *buf, size_t size);
 int bpf__foreach_tev(struct bpf_object *obj,
 		     bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+		    struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+			     struct parse_events_term *term,
+			     struct perf_evlist *evlist,
+			     int *error_pos, int err, char *buf,
+			     size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused,
@@ -84,6 +110,21 @@
 }
 
 static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+		struct parse_events_term *term __maybe_unused,
+		struct perf_evlist *evlist __maybe_unused,
+		int *error_pos __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+	return 0;
+}
+
+static inline int
 __bpf_strerror(char *buf, size_t size)
 {
 	if (!size)
@@ -118,5 +159,23 @@
 {
 	return __bpf_strerror(buf, size);
 }
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			 struct parse_events_term *term __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused,
+			 int *error_pos __maybe_unused,
+			 int err __maybe_unused,
+			 char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+			       char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
 #endif
 #endif
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 6a7e273..f1479ee 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -166,6 +166,50 @@
 	return build_id__filename(build_id_hex, bf, size);
 }
 
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
+{
+	char *id_name, *ch;
+	struct stat sb;
+
+	id_name = dso__build_id_filename(dso, bf, size);
+	if (!id_name)
+		goto err;
+	if (access(id_name, F_OK))
+		goto err;
+	if (lstat(id_name, &sb) == -1)
+		goto err;
+	if ((size_t)sb.st_size > size - 1)
+		goto err;
+	if (readlink(id_name, bf, size - 1) < 0)
+		goto err;
+
+	bf[sb.st_size] = '\0';
+
+	/*
+	 * link should be:
+	 * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
+	 */
+	ch = strrchr(bf, '/');
+	if (!ch)
+		goto err;
+	if (ch - 3 < bf)
+		goto err;
+
+	return strncmp(".ko", ch - 3, 3) == 0;
+err:
+	/*
+	 * If dso__build_id_filename work, get id_name again,
+	 * because id_name points to bf and is broken.
+	 */
+	if (id_name)
+		id_name = dso__build_id_filename(dso, bf, size);
+	pr_err("Invalid build id: %s\n", id_name ? :
+					 dso->long_name ? :
+					 dso->short_name ? :
+					 "[unknown]");
+	return false;
+}
+
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\
@@ -211,6 +255,7 @@
 	dsos__for_each_with_build_id(pos, &machine->dsos.head) {
 		const char *name;
 		size_t name_len;
+		bool in_kernel = false;
 
 		if (!pos->hit)
 			continue;
@@ -227,8 +272,11 @@
 			name_len = pos->long_name_len + 1;
 		}
 
+		in_kernel = pos->kernel ||
+				is_kernel_module(name,
+					PERF_RECORD_MISC_CPUMODE_UNKNOWN);
 		err = write_buildid(name, name_len, pos->build_id, machine->pid,
-				    pos->kernel ? kmisc : umisc, fd);
+				    in_kernel ? kmisc : umisc, fd);
 		if (err)
 			break;
 	}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 27a14a8..64af3e2 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -16,6 +16,7 @@
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct perf_evsel *evsel,
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 07b5d63..3ca453f 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -23,6 +23,8 @@
 #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 #define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
 
+extern const char *config_exclusive_filename;
+
 typedef int (*config_fn_t)(const char *, const char *, void *);
 extern int perf_default_config(const char *, const char *, void *);
 extern int perf_config(config_fn_t fn, void *);
@@ -31,6 +33,7 @@
 extern int perf_config_bool(const char *, const char *);
 extern int config_error_nonbool(const char *);
 extern const char *perf_config_dirname(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 53c43eb..24b4bd0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -416,7 +416,7 @@
 /*
  * Fill the node with callchain values
  */
-static void
+static int
 fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 {
 	struct callchain_cursor_node *cursor_node;
@@ -433,7 +433,7 @@
 		call = zalloc(sizeof(*call));
 		if (!call) {
 			perror("not enough memory for the code path tree");
-			return;
+			return -1;
 		}
 		call->ip = cursor_node->ip;
 		call->ms.sym = cursor_node->sym;
@@ -443,6 +443,7 @@
 		callchain_cursor_advance(cursor);
 		cursor_node = callchain_cursor_current(cursor);
 	}
+	return 0;
 }
 
 static struct callchain_node *
@@ -453,7 +454,19 @@
 	struct callchain_node *new;
 
 	new = create_child(parent, false);
-	fill_node(new, cursor);
+	if (new == NULL)
+		return NULL;
+
+	if (fill_node(new, cursor) < 0) {
+		struct callchain_list *call, *tmp;
+
+		list_for_each_entry_safe(call, tmp, &new->val, list) {
+			list_del(&call->list);
+			free(call);
+		}
+		free(new);
+		return NULL;
+	}
 
 	new->children_hit = 0;
 	new->hit = period;
@@ -462,16 +475,32 @@
 	return new;
 }
 
-static s64 match_chain(struct callchain_cursor_node *node,
-		      struct callchain_list *cnode)
+enum match_result {
+	MATCH_ERROR  = -1,
+	MATCH_EQ,
+	MATCH_LT,
+	MATCH_GT,
+};
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+				     struct callchain_list *cnode)
 {
 	struct symbol *sym = node->sym;
+	u64 left, right;
 
 	if (cnode->ms.sym && sym &&
-	    callchain_param.key == CCKEY_FUNCTION)
-		return cnode->ms.sym->start - sym->start;
-	else
-		return cnode->ip - node->ip;
+	    callchain_param.key == CCKEY_FUNCTION) {
+		left = cnode->ms.sym->start;
+		right = sym->start;
+	} else {
+		left = cnode->ip;
+		right = node->ip;
+	}
+
+	if (left == right)
+		return MATCH_EQ;
+
+	return left > right ? MATCH_GT : MATCH_LT;
 }
 
 /*
@@ -479,7 +508,7 @@
  * give a part of its callchain to the created child.
  * Then create another child to host the given callchain of new branch
  */
-static void
+static int
 split_add_child(struct callchain_node *parent,
 		struct callchain_cursor *cursor,
 		struct callchain_list *to_split,
@@ -491,6 +520,8 @@
 
 	/* split */
 	new = create_child(parent, true);
+	if (new == NULL)
+		return -1;
 
 	/* split the callchain and move a part to the new child */
 	old_tail = parent->val.prev;
@@ -524,6 +555,8 @@
 
 		node = callchain_cursor_current(cursor);
 		new = add_child(parent, cursor, period);
+		if (new == NULL)
+			return -1;
 
 		/*
 		 * This is second child since we moved parent's children
@@ -534,7 +567,7 @@
 		cnode = list_first_entry(&first->val, struct callchain_list,
 					 list);
 
-		if (match_chain(node, cnode) < 0)
+		if (match_chain(node, cnode) == MATCH_LT)
 			pp = &p->rb_left;
 		else
 			pp = &p->rb_right;
@@ -545,14 +578,15 @@
 		parent->hit = period;
 		parent->count = 1;
 	}
+	return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
 	     struct callchain_cursor *cursor,
 	     u64 period);
 
-static void
+static int
 append_chain_children(struct callchain_node *root,
 		      struct callchain_cursor *cursor,
 		      u64 period)
@@ -564,36 +598,42 @@
 
 	node = callchain_cursor_current(cursor);
 	if (!node)
-		return;
+		return -1;
 
 	/* lookup in childrens */
 	while (*p) {
-		s64 ret;
+		enum match_result ret;
 
 		parent = *p;
 		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
 
 		/* If at least first entry matches, rely to children */
 		ret = append_chain(rnode, cursor, period);
-		if (ret == 0)
+		if (ret == MATCH_EQ)
 			goto inc_children_hit;
+		if (ret == MATCH_ERROR)
+			return -1;
 
-		if (ret < 0)
+		if (ret == MATCH_LT)
 			p = &parent->rb_left;
 		else
 			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
 	rnode = add_child(root, cursor, period);
+	if (rnode == NULL)
+		return -1;
+
 	rb_link_node(&rnode->rb_node_in, parent, p);
 	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
 	root->children_count++;
+	return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
 	     struct callchain_cursor *cursor,
 	     u64 period)
@@ -602,7 +642,7 @@
 	u64 start = cursor->pos;
 	bool found = false;
 	u64 matches;
-	int cmp = 0;
+	enum match_result cmp = MATCH_ERROR;
 
 	/*
 	 * Lookup in the current node
@@ -618,7 +658,7 @@
 			break;
 
 		cmp = match_chain(node, cnode);
-		if (cmp)
+		if (cmp != MATCH_EQ)
 			break;
 
 		found = true;
@@ -628,7 +668,7 @@
 
 	/* matches not, relay no the parent */
 	if (!found) {
-		WARN_ONCE(!cmp, "Chain comparison error\n");
+		WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
 		return cmp;
 	}
 
@@ -636,21 +676,25 @@
 
 	/* we match only a part of the node. Split it and add the new chain */
 	if (matches < root->val_nr) {
-		split_add_child(root, cursor, cnode, start, matches, period);
-		return 0;
+		if (split_add_child(root, cursor, cnode, start, matches,
+				    period) < 0)
+			return MATCH_ERROR;
+
+		return MATCH_EQ;
 	}
 
 	/* we match 100% of the path, increment the hit */
 	if (matches == root->val_nr && cursor->pos == cursor->nr) {
 		root->hit += period;
 		root->count++;
-		return 0;
+		return MATCH_EQ;
 	}
 
 	/* We match the node and still have a part remaining */
-	append_chain_children(root, cursor, period);
+	if (append_chain_children(root, cursor, period) < 0)
+		return MATCH_ERROR;
 
-	return 0;
+	return MATCH_EQ;
 }
 
 int callchain_append(struct callchain_root *root,
@@ -662,7 +706,8 @@
 
 	callchain_cursor_commit(cursor);
 
-	append_chain_children(&root->node, cursor, period);
+	if (append_chain_children(&root->node, cursor, period) < 0)
+		return -1;
 
 	if (cursor->nr > root->max_depth)
 		root->max_depth = cursor->nr;
@@ -690,7 +735,8 @@
 
 	if (src->hit) {
 		callchain_cursor_commit(cursor);
-		append_chain_children(dst, cursor, src->hit);
+		if (append_chain_children(dst, cursor, src->hit) < 0)
+			return -1;
 	}
 
 	n = rb_first(&src->rb_root_in);
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index e5fb88b..43e84aa 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -32,14 +32,15 @@
 	return 0;
 }
 
-int perf_color_default_config(const char *var, const char *value, void *cb)
+int perf_color_default_config(const char *var, const char *value,
+			      void *cb __maybe_unused)
 {
 	if (!strcmp(var, "color.ui")) {
 		perf_use_color_default = perf_config_colorbool(var, value, -1);
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int __color_vsnprintf(char *bf, size_t size, const char *color,
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index d3e12e3..4e72763 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -26,7 +26,7 @@
 static int config_linenr;
 static int config_file_eof;
 
-static const char *config_exclusive_filename;
+const char *config_exclusive_filename;
 
 static int get_next_char(void)
 {
@@ -434,7 +434,7 @@
 	return ret;
 }
 
-static const char *perf_etc_perfconfig(void)
+const char *perf_etc_perfconfig(void)
 {
 	static const char *system_wide;
 	if (!system_wide)
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index fa93509..9bcf2be 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -8,6 +8,10 @@
 #include <linux/bitmap.h>
 #include "asm/bug.h"
 
+static int max_cpu_num;
+static int max_node_num;
+static int *cpunode_map;
+
 static struct cpu_map *cpu_map__default_new(void)
 {
 	struct cpu_map *cpus;
@@ -486,6 +490,32 @@
 		pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
 }
 
+int cpu__max_node(void)
+{
+	if (unlikely(!max_node_num))
+		set_max_node_num();
+
+	return max_node_num;
+}
+
+int cpu__max_cpu(void)
+{
+	if (unlikely(!max_cpu_num))
+		set_max_cpu_num();
+
+	return max_cpu_num;
+}
+
+int cpu__get_node(int cpu)
+{
+	if (unlikely(cpunode_map == NULL)) {
+		pr_debug("cpu_map not initialized\n");
+		return -1;
+	}
+
+	return cpunode_map[cpu];
+}
+
 static int init_cpunode_map(void)
 {
 	int i;
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 71c41b9..81a2562 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -57,37 +57,11 @@
 	return map ? map->map[0] == -1 : true;
 }
 
-int max_cpu_num;
-int max_node_num;
-int *cpunode_map;
-
 int cpu__setup_cpunode_map(void);
 
-static inline int cpu__max_node(void)
-{
-	if (unlikely(!max_node_num))
-		pr_debug("cpu_map not initialized\n");
-
-	return max_node_num;
-}
-
-static inline int cpu__max_cpu(void)
-{
-	if (unlikely(!max_cpu_num))
-		pr_debug("cpu_map not initialized\n");
-
-	return max_cpu_num;
-}
-
-static inline int cpu__get_node(int cpu)
-{
-	if (unlikely(cpunode_map == NULL)) {
-		pr_debug("cpu_map not initialized\n");
-		return -1;
-	}
-
-	return cpunode_map[cpu];
-}
+int cpu__max_node(void);
+int cpu__max_cpu(void);
+int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 		       int (*f)(struct cpu_map *map, int cpu, void *data),
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index aada3ac..d4a5a21 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -32,8 +32,17 @@
 
 const char *graph_line =
 	"_____________________________________________________________________"
+	"_____________________________________________________________________"
 	"_____________________________________________________________________";
 const char *graph_dotted_line =
 	"---------------------------------------------------------------------"
 	"---------------------------------------------------------------------"
 	"---------------------------------------------------------------------";
+const char *spaces =
+	"                                                                     "
+	"                                                                     "
+	"                                                                     ";
+const char *dots =
+	"....................................................................."
+	"....................................................................."
+	".....................................................................";
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 34cd1e4..811af89 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -352,6 +352,84 @@
 	return ret;
 }
 
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct perf_sample *sample)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int raw_size = sample->raw_size;
+	unsigned int nr_elements = raw_size / sizeof(u32);
+	unsigned int i;
+	int ret;
+
+	if (nr_elements * sizeof(u32) != raw_size)
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+			   raw_size, nr_elements * sizeof(u32) - raw_size);
+
+	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'raw_len' for bpf output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for raw_len\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+	if (ret) {
+		pr_err("failed to set payload to raw_len\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'raw_data' for bpf output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'raw_data'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u32 *)(sample->raw_data))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set raw_data[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
 static int add_generic_values(struct ctf_writer *cw,
 			      struct bt_ctf_event *event,
 			      struct perf_evsel *evsel,
@@ -597,6 +675,12 @@
 			return -1;
 	}
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_values(event_class, event, sample);
+		if (ret)
+			return -1;
+	}
+
 	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
 	if (cs) {
 		if (is_flush_needed(cs))
@@ -744,6 +828,25 @@
 	return ret;
 }
 
+static int add_bpf_output_types(struct ctf_writer *cw,
+				struct bt_ctf_event_class *class)
+{
+	struct bt_ctf_field_type *len_type = cw->data.u32;
+	struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+	struct bt_ctf_field_type *seq_type;
+	int ret;
+
+	ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+	if (ret)
+		return ret;
+
+	seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+	if (!seq_type)
+		return -1;
+
+	return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 			     struct bt_ctf_event_class *event_class)
 {
@@ -755,7 +858,8 @@
 	 *                              ctf event header
 	 *   PERF_SAMPLE_READ         - TODO
 	 *   PERF_SAMPLE_CALLCHAIN    - TODO
-	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+	 *                              are handled separately
 	 *   PERF_SAMPLE_BRANCH_STACK - TODO
 	 *   PERF_SAMPLE_REGS_USER    - TODO
 	 *   PERF_SAMPLE_STACK_USER   - TODO
@@ -824,6 +928,12 @@
 			goto err;
 	}
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_types(cw, event_class);
+		if (ret)
+			goto err;
+	}
+
 	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
 	if (ret) {
 		pr("Failed to add event class into stream.\n");
@@ -858,6 +968,23 @@
 	return 0;
 }
 
+static void cleanup_events(struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel) {
+		struct evsel_priv *priv;
+
+		priv = evsel->priv;
+		bt_ctf_event_class_put(priv->event_class);
+		zfree(&evsel->priv);
+	}
+
+	perf_evlist__delete(evlist);
+	session->evlist = NULL;
+}
+
 static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
 {
 	struct ctf_stream **stream;
@@ -953,6 +1080,12 @@
 	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
 		goto err;
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
 	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
 	    size, sign ? "un" : "", hex ? "hex" : "");
 	return type;
@@ -1100,7 +1233,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
@@ -1171,6 +1304,7 @@
 		(double) c.events_size / 1024.0 / 1024.0,
 		c.events_count);
 
+	cleanup_events(session);
 	perf_session__delete(session);
 	ctf_writer__cleanup(cw);
 
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 86d9c73..8c4212a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -5,6 +5,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <api/debug.h>
 
 #include "cache.h"
 #include "color.h"
@@ -22,7 +23,7 @@
 static int redirect_to_stderr;
 int debug_data_convert;
 
-static int _eprintf(int level, int var, const char *fmt, va_list args)
+int veprintf(int level, int var, const char *fmt, va_list args)
 {
 	int ret = 0;
 
@@ -36,24 +37,19 @@
 	return ret;
 }
 
-int veprintf(int level, int var, const char *fmt, va_list args)
-{
-	return _eprintf(level, var, fmt, args);
-}
-
 int eprintf(int level, int var, const char *fmt, ...)
 {
 	va_list args;
 	int ret;
 
 	va_start(args, fmt);
-	ret = _eprintf(level, var, fmt, args);
+	ret = veprintf(level, var, fmt, args);
 	va_end(args);
 
 	return ret;
 }
 
-static int __eprintf_time(u64 t, const char *fmt, va_list args)
+static int veprintf_time(u64 t, const char *fmt, va_list args)
 {
 	int ret = 0;
 	u64 secs, usecs, nsecs = t;
@@ -75,7 +71,7 @@
 
 	if (var >= level) {
 		va_start(args, fmt);
-		ret = __eprintf_time(t, fmt, args);
+		ret = veprintf_time(t, fmt, args);
 		va_end(args);
 	}
 
@@ -91,7 +87,7 @@
 	va_list args;
 
 	va_start(args, fmt);
-	_eprintf(1, verbose, fmt, args);
+	veprintf(1, verbose, fmt, args);
 	va_end(args);
 	eprintf(1, verbose, "\n");
 }
@@ -110,40 +106,61 @@
 	return ret;
 }
 
+static void trace_event_printer(enum binary_printer_ops op,
+				unsigned int val, void *extra)
+{
+	const char *color = PERF_COLOR_BLUE;
+	union perf_event *event = (union perf_event *)extra;
+	unsigned char ch = (unsigned char)val;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printf(".");
+		color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+				event->header.size);
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printf(".");
+		break;
+	case BINARY_PRINT_ADDR:
+		color_fprintf(stdout, color, "  %04x: ", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		color_fprintf(stdout, color, " %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		color_fprintf(stdout, color, "   ");
+		break;
+	case BINARY_PRINT_SEP:
+		color_fprintf(stdout, color, "  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		color_fprintf(stdout, color, "%c",
+			      isprint(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		color_fprintf(stdout, color, " ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		color_fprintf(stdout, color, "\n");
+		break;
+	case BINARY_PRINT_DATA_END:
+		printf("\n");
+		break;
+	default:
+		break;
+	}
+}
+
 void trace_event(union perf_event *event)
 {
 	unsigned char *raw_event = (void *)event;
-	const char *color = PERF_COLOR_BLUE;
-	int i, j;
 
 	if (!dump_trace)
 		return;
 
-	printf(".");
-	color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
-		      event->header.size);
-
-	for (i = 0; i < event->header.size; i++) {
-		if ((i & 15) == 0) {
-			printf(".");
-			color_fprintf(stdout, color, "  %04x: ", i);
-		}
-
-		color_fprintf(stdout, color, " %02x", raw_event[i]);
-
-		if (((i & 15) == 15) || i == event->header.size-1) {
-			color_fprintf(stdout, color, "  ");
-			for (j = 0; j < 15-(i & 15); j++)
-				color_fprintf(stdout, color, "   ");
-			for (j = i & ~15; j <= i; j++) {
-				color_fprintf(stdout, color, "%c",
-					      isprint(raw_event[j]) ?
-					      raw_event[j] : '.');
-			}
-			color_fprintf(stdout, color, "\n");
-		}
-	}
-	printf(".\n");
+	print_binary(raw_event, event->header.size, 16,
+		     trace_event_printer, event);
 }
 
 static struct debug_variable {
@@ -192,3 +209,23 @@
 	free(s);
 	return 0;
 }
+
+#define DEBUG_WRAPPER(__n, __l)				\
+static int pr_ ## __n ## _wrapper(const char *fmt, ...)	\
+{							\
+	va_list args;					\
+	int ret;					\
+							\
+	va_start(args, fmt);				\
+	ret = veprintf(__l, verbose, fmt, args);	\
+	va_end(args);					\
+	return ret;					\
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+	libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 8b9a088..14bafda 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -53,5 +53,6 @@
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+void perf_debug_setup(void);
 
 #endif	/* __PERF_DEBUG_H */
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644
index 0000000..3e6062a
--- /dev/null
+++ b/tools/perf/util/demangle-java.c
@@ -0,0 +1,199 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+enum {
+	MODE_PREFIX = 0,
+	MODE_CLASS  = 1,
+	MODE_FUNC   = 2,
+	MODE_TYPE   = 3,
+	MODE_CTYPE  = 3, /* class arg */
+};
+
+#define BASE_ENT(c, n)	[c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+	BASE_ENT('B', "byte" ),
+	BASE_ENT('C', "char" ),
+	BASE_ENT('D', "double" ),
+	BASE_ENT('F', "float" ),
+	BASE_ENT('I', "int" ),
+	BASE_ENT('J', "long" ),
+	BASE_ENT('S', "short" ),
+	BASE_ENT('Z', "bool" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ *  Return:
+ *	success: buf
+ *	error  : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+	int rlen = 0;
+	int array = 0;
+	int narg = 0;
+	const char *q;
+
+	if (!end)
+		end = str + strlen(str);
+
+	for (q = str; q != end; q++) {
+
+		if (rlen == (maxlen - 1))
+			break;
+
+		switch (*q) {
+		case 'L':
+			if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
+				if (mode == MODE_CTYPE) {
+					if (narg)
+						rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+					narg++;
+				}
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
+				if (mode == MODE_PREFIX)
+					mode = MODE_CLASS;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'F':
+		case 'I':
+		case 'J':
+		case 'S':
+		case 'Z':
+			if (mode == MODE_TYPE) {
+				if (narg)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+				narg++;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'V':
+			if (mode == MODE_TYPE) {
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case '[':
+			if (mode != MODE_TYPE)
+				goto error;
+			array++;
+			break;
+		case '(':
+			if (mode != MODE_FUNC)
+				goto error;
+			buf[rlen++] = *q;
+			mode = MODE_TYPE;
+			break;
+		case ')':
+			if (mode != MODE_TYPE)
+				goto error;
+			buf[rlen++] = *q;
+			narg = 0;
+			break;
+		case ';':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			/* safe because at least one other char to process */
+			if (isalpha(*(q + 1)))
+				rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			if (mode == MODE_CLASS)
+				mode = MODE_FUNC;
+			else if (mode == MODE_CTYPE)
+				mode = MODE_TYPE;
+			break;
+		case '/':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			break;
+		default :
+			buf[rlen++] = *q;
+		}
+	}
+	buf[rlen] = '\0';
+	return buf;
+error:
+	return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ * 	str: string to parse. String is not modified
+ *    flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ *	if input can be demangled, then a newly allocated string is returned.
+ *	if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+	char *buf, *ptr;
+	char *p;
+	size_t len, l1 = 0;
+
+	if (!str)
+		return NULL;
+
+	/* find start of retunr type */
+	p = strrchr(str, ')');
+	if (!p)
+		return NULL;
+
+	/*
+	 * expansion factor estimated to 3x
+	 */
+	len = strlen(str) * 3 + 1;
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	buf[0] = '\0';
+	if (!(flags & JAVA_DEMANGLE_NORET)) {
+		/*
+		 * get return type first
+		 */
+		ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+		if (!ptr)
+			goto error;
+
+		/* add space between return type and function prototype */
+		l1 = strlen(buf);
+		buf[l1++] = ' ';
+	}
+
+	/* process function up to return type */
+	ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+	if (!ptr)
+		goto error;
+
+	return buf;
+error:
+	free(buf);
+	return NULL;
+}
diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644
index 0000000..a981c1f
--- /dev/null
+++ b/tools/perf/util/demangle-java.h
@@ -0,0 +1,10 @@
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET	0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e8e9a9d..8e639543 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -52,6 +52,11 @@
 			debuglink--;
 		if (*debuglink == '/')
 			debuglink++;
+
+		ret = -1;
+		if (!is_regular_file(filename))
+			break;
+
 		ret = filename__read_debuglink(filename, debuglink,
 					       size - (debuglink - filename));
 		}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 7dd5939..49a11d9 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -6,6 +6,8 @@
 
 void perf_env__exit(struct perf_env *env)
 {
+	int i;
+
 	zfree(&env->hostname);
 	zfree(&env->os_release);
 	zfree(&env->version);
@@ -19,6 +21,10 @@
 	zfree(&env->numa_nodes);
 	zfree(&env->pmu_mappings);
 	zfree(&env->cpu);
+
+	for (i = 0; i < env->caches_cnt; i++)
+		cpu_cache_level__free(&env->caches[i]);
+	zfree(&env->caches);
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -75,3 +81,10 @@
 	env->nr_cpus_avail = nr_cpus;
 	return 0;
 }
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+	free(cache->type);
+	free(cache->map);
+	free(cache->size);
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 0132b95..56cffb6 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -1,11 +1,23 @@
 #ifndef __PERF_ENV_H
 #define __PERF_ENV_H
 
+#include <linux/types.h>
+
 struct cpu_topology_map {
 	int	socket_id;
 	int	core_id;
 };
 
+struct cpu_cache_level {
+	u32	level;
+	u32	line_size;
+	u32	sets;
+	u32	ways;
+	char	*type;
+	char	*size;
+	char	*map;
+};
+
 struct perf_env {
 	char			*hostname;
 	char			*os_release;
@@ -31,6 +43,8 @@
 	char			*numa_nodes;
 	char			*pmu_mappings;
 	struct cpu_topology_map	*cpu;
+	struct cpu_cache_level	*caches;
+	int			 caches_cnt;
 };
 
 extern struct perf_env perf_env;
@@ -41,4 +55,5 @@
 
 int perf_env__read_cpu_topology_map(struct perf_env *env);
 
+void cpu_cache_level__free(struct cpu_cache_level *cache);
 #endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 85155e9..7bad5c3 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -282,7 +282,7 @@
 		strcpy(execname, "");
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
 		       &event->mmap2.start, &event->mmap2.len, prot,
 		       &event->mmap2.pgoff, &event->mmap2.maj,
 		       &event->mmap2.min,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d81f13d..86a0383 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1181,12 +1181,12 @@
 	 */
 	if (cpus != evlist->cpus) {
 		cpu_map__put(evlist->cpus);
-		evlist->cpus = cpus;
+		evlist->cpus = cpu_map__get(cpus);
 	}
 
 	if (threads != evlist->threads) {
 		thread_map__put(evlist->threads);
-		evlist->threads = threads;
+		evlist->threads = thread_map__get(threads);
 	}
 
 	perf_evlist__propagate_maps(evlist);
@@ -1223,6 +1223,9 @@
 	int err = 0;
 
 	evlist__for_each(evlist, evsel) {
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
 		err = perf_evsel__set_filter(evsel, filter);
 		if (err)
 			break;
@@ -1624,7 +1627,7 @@
 	return printed + fprintf(fp, "\n");
 }
 
-int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
+int perf_evlist__strerror_open(struct perf_evlist *evlist,
 			       int err, char *buf, size_t size)
 {
 	int printed, value;
@@ -1652,7 +1655,25 @@
 				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
 				    "Hint:\tThe current value is %d.", value);
 		break;
+	case EINVAL: {
+		struct perf_evsel *first = perf_evlist__first(evlist);
+		int max_freq;
+
+		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+			goto out_default;
+
+		if (first->attr.sample_freq < (u64)max_freq)
+			goto out_default;
+
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+				    emsg, max_freq, first->attr.sample_freq);
+		break;
+	}
 	default:
+out_default:
 		scnprintf(buf, size, "%s", emsg);
 		break;
 	}
@@ -1723,3 +1744,19 @@
 
 	tracking_evsel->tracking = true;
 }
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
+			       const char *str)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel) {
+		if (!evsel->name)
+			continue;
+		if (strcmp(str, evsel->name) == 0)
+			return evsel;
+	}
+
+	return NULL;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 7c4d9a2..a0d1522 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -294,4 +294,7 @@
 				     struct perf_evsel *tracking_evsel);
 
 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index cdbaf9b..0902fe4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -225,6 +225,11 @@
 	if (evsel != NULL)
 		perf_evsel__init(evsel, attr, idx);
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+		evsel->attr.sample_period = 1;
+	}
+
 	return evsel;
 }
 
@@ -898,6 +903,16 @@
 	if (evsel->precise_max)
 		perf_event_attr__set_max_precise_ip(attr);
 
+	if (opts->all_user) {
+		attr->exclude_kernel = 1;
+		attr->exclude_user   = 0;
+	}
+
+	if (opts->all_kernel) {
+		attr->exclude_kernel = 0;
+		attr->exclude_user   = 1;
+	}
+
 	/*
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
@@ -2362,12 +2377,15 @@
 	case EPERM:
 	case EACCES:
 		return scnprintf(msg, size,
-		 "You may not have permission to collect %sstats.\n"
-		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
-		 " -1 - Not paranoid at all\n"
-		 "  0 - Disallow raw tracepoint access for unpriv\n"
-		 "  1 - Disallow cpu events for unpriv\n"
-		 "  2 - Disallow kernel profiling for unpriv",
+		 "You may not have permission to collect %sstats.\n\n"
+		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
+		 "which controls use of the performance events system by\n"
+		 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+		 "The default value is 1:\n\n"
+		 "  -1: Allow use of (almost) all events by all users\n"
+		 ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
+		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
+		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
 				 target->system_wide ? "system-wide " : "");
 	case ENOENT:
 		return scnprintf(msg, size, "The %s event is not supported.",
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 8e75434..501ea6e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -93,10 +93,8 @@
 	const char		*unit;
 	struct event_format	*tp_format;
 	off_t			id_offset;
-	union {
-		void		*priv;
-		u64		db_id;
-	};
+	void			*priv;
+	u64			db_id;
 	struct cgroup_sel	*cgrp;
 	void			*handler;
 	struct cpu_map		*cpus;
@@ -364,6 +362,14 @@
 #undef FUNCTION_EVENT
 }
 
+static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
+		(attr->type == PERF_TYPE_SOFTWARE);
+}
+
 struct perf_attr_details {
 	bool freq;
 	bool verbose;
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644
index 0000000..c1ef805
--- /dev/null
+++ b/tools/perf/util/genelf.c
@@ -0,0 +1,449 @@
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@gmail.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define JVMTI
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA	/* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+  unsigned int namesz;  /* Size of entry's owner string */
+  unsigned int descsz;  /* Size of the note descriptor */
+  unsigned int type;    /* Interpretation of the descriptor */
+  char         name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+	char *output;
+	int fd;
+};
+
+static char shd_string_table[] = {
+	0,
+	'.', 't', 'e', 'x', 't', 0,			/*  1 */
+	'.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /*  7 */
+	'.', 's', 'y', 'm', 't', 'a', 'b', 0,		/* 17 */
+	'.', 's', 't', 'r', 't', 'a', 'b', 0,		/* 25 */
+	'.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+};
+
+static struct buildid_note {
+	Elf_Note desc;		/* descsz: size of build-id, must be multiple of 4 */
+	char	 name[4];	/* GNU\0 */
+	char	 build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+	/* symbol 0 MUST be the undefined symbol */
+	{ .st_name  = 0, /* index in sym_string table */
+	  .st_info  = ELF_ST_TYPE(STT_NOTYPE),
+	  .st_shndx = 0, /* for now */
+	  .st_value = 0x0,
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0,
+	},
+	{ .st_name  = 1, /* index in sym_string table */
+	  .st_info  = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+	  .st_shndx = 1,
+	  .st_value = 0, /* for now */
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0, /* for now */
+	}
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code __maybe_unused,
+	     size_t csize __maybe_unused)
+{
+	int fd;
+	size_t sz = sizeof(note->build_id);
+	ssize_t sret;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd == -1)
+		err(1, "cannot access /dev/urandom for builid");
+
+	sret = read(fd, note->build_id, sz);
+
+	close(fd);
+
+	if (sret != (ssize_t)sz)
+		memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code,
+	     size_t csize)
+{
+	if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+		errx(1, "build_id too small for SHA1");
+
+	SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+	MD5_CTX context;
+
+	if (sizeof(note->build_id) < 16)
+		errx(1, "build_id too small for MD5");
+
+	MD5_Init(&context);
+	MD5_Update(&context, &load_addr, sizeof(load_addr));
+	MD5_Update(&context, code, csize);
+	MD5_Final((unsigned char *)note->build_id, &context);
+}
+#endif
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+	      const void *code, int csize,
+	      void *debug, int nr_debug_entries)
+{
+	Elf *e;
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Ehdr *ehdr;
+	Elf_Shdr *shdr;
+	char *strsym = NULL;
+	int symlen;
+	int retval = -1;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		warnx("ELF initialization failed");
+		return -1;
+	}
+
+	e = elf_begin(fd, ELF_C_WRITE, NULL);
+	if (!e) {
+		warnx("elf_begin failed");
+		goto error;
+	}
+
+	/*
+	 * setup ELF header
+	 */
+	ehdr = elf_newehdr(e);
+	if (!ehdr) {
+		warnx("cannot get ehdr");
+		goto error;
+	}
+
+	ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+	ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+	ehdr->e_machine = GEN_ELF_ARCH;
+	ehdr->e_type = ET_DYN;
+	ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_shstrndx= 2; /* shdr index for section name */
+
+	/*
+	 * setup text section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 16;
+	d->d_off = 0LL;
+	d->d_buf = (void *)code;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = csize;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 1;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+	shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup section headers string table
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = shd_string_table;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(shd_string_table);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup symtab section
+	 */
+	symtab[1].st_size  = csize;
+	symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 8;
+	d->d_off = 0LL;
+	d->d_buf = symtab;
+	d->d_type = ELF_T_SYM;
+	d->d_size = sizeof(symtab);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+	shdr->sh_type = SHT_SYMTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = sizeof(Elf_Sym);
+	shdr->sh_link = 4; /* index of .strtab section */
+
+	/*
+	 * setup symbols string table
+	 * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+	 */
+	symlen = 2 + strlen(sym);
+	strsym = calloc(1, symlen);
+	if (!strsym) {
+		warnx("cannot allocate strsym");
+		goto error;
+	}
+	strcpy(strsym + 1, sym);
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = strsym;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = symlen;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 25; /* offset in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup build-id section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	/*
+	 * build-id generation
+	 */
+	gen_build_id(&bnote, load_addr, code, csize);
+	bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+	bnote.desc.descsz = sizeof(bnote.build_id);
+	bnote.desc.type   = NT_GNU_BUILD_ID;
+	strcpy(bnote.name, "GNU");
+
+	d->d_align = 4;
+	d->d_off = 0LL;
+	d->d_buf = &bnote;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(bnote);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 33; /* offset in shd_string_table */
+	shdr->sh_type = SHT_NOTE;
+	shdr->sh_addr = 0x0;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_size = sizeof(bnote);
+	shdr->sh_entsize = 0;
+
+	if (debug && nr_debug_entries) {
+		retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+		if (retval)
+			goto error;
+	} else {
+		if (elf_update(e, ELF_C_WRITE) < 0) {
+			warnx("elf_update 4 failed");
+			goto error;
+		}
+	}
+
+	retval = 0;
+error:
+	(void)elf_end(e);
+
+	free(strsym);
+
+
+	return retval;
+}
+
+#ifndef JVMTI
+
+static unsigned char x86_code[] = {
+    0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+    0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+    0xCD, 0x80            /* int $0x80 */
+};
+
+static struct options options;
+
+int main(int argc, char **argv)
+{
+	int c, fd, ret;
+
+	while ((c = getopt(argc, argv, "o:h")) != -1) {
+		switch (c) {
+		case 'o':
+			options.output = optarg;
+			break;
+		case 'h':
+			printf("Usage: genelf -o output_file [-h]\n");
+			return 0;
+		default:
+			errx(1, "unknown option");
+		}
+	}
+
+	fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
+	if (fd == -1)
+		err(1, "cannot create file %s", options.output);
+
+	ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
+	close(fd);
+
+	if (ret != 0)
+		unlink(options.output);
+
+	return ret;
+}
+#endif
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644
index 0000000..45bf9c6
--- /dev/null
+++ b/tools/perf/util/genelf.h
@@ -0,0 +1,67 @@
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+/* genelf.c */
+extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+			 const void *code, int csize,
+			 void *debug, int nr_debug_entries);
+/* genelf_debug.c */
+extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
+			      void *debug, int nr_debug_entries);
+
+#if   defined(__arm__)
+#define GEN_ELF_ARCH	EM_ARM
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH	EM_AARCH64
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH	EM_X86_64
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH	EM_386
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__ppcle__)
+#define GEN_ELF_ARCH	EM_PPC
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH	EM_PPC64
+#define GEN_ELF_ENDIAN	ELFDATA2MSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__powerpcle__)
+#define GEN_ELF_ARCH	EM_PPC64
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#else
+#error "unsupported architecture"
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr	elf64_newehdr
+#define elf_getshdr	elf64_getshdr
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define ELF_ST_TYPE(a)	ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr	elf32_newehdr
+#define elf_getshdr	elf32_getshdr
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define ELF_ST_TYPE(a)	ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+
+#endif
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644
index 0000000..5980f7d
--- /dev/null
+++ b/tools/perf/util/genelf_debug.c
@@ -0,0 +1,610 @@
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@google.com>
+ *
+ * Released under the GPL v2.
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE	(4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t  sword;
+typedef int16_t  shalf;
+typedef uint8_t  ubyte;
+typedef int8_t   sbyte;
+
+struct buffer_ext {
+	size_t cur_pos;
+	size_t max_sz;
+	void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+	size_t i;
+	warnx("DUMP for %s", msg);
+	for (i = 0 ; i < be->cur_pos; i++)
+		warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+	void *tmp;
+	size_t be_sz = be->max_sz;
+
+retry:
+	if ((be->cur_pos + sz) < be_sz) {
+		memcpy(be->data + be->cur_pos, addr, sz);
+		be->cur_pos += sz;
+		return 0;
+	}
+
+	if (!be_sz)
+		be_sz = BUFFER_EXT_DFL_SIZE;
+	else
+		be_sz <<= 1;
+
+	tmp = realloc(be->data, be_sz);
+	if (!tmp)
+		return -1;
+
+	be->data   = tmp;
+	be->max_sz = be_sz;
+
+	goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+	be->data = NULL;
+	be->cur_pos = 0;
+	be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+	return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+	return be->data;
+}
+
+struct debug_line_header {
+	// Not counting this field
+	uword total_length;
+	// version number (2 currently)
+	uhalf version;
+	// relative offset from next field to
+	// program statement
+	uword prolog_length;
+	ubyte minimum_instruction_length;
+	ubyte default_is_stmt;
+	// line_base - see DWARF 2 specs
+	sbyte line_base;
+	// line_range - see DWARF 2 specs
+	ubyte line_range;
+	// number of opcode + 1
+	ubyte opcode_base;
+	/* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+	/* follow the search directories index, zero terminated string
+	 * terminated by an empty string.
+	 */
+	/* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+	 * the directory index entry, 0 means current directory, then mtime
+	 * and filesize, last entry is followed by en empty string.
+	 */
+	/* follow the first program statement */
+} __attribute__((packed));
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+	uword total_length;
+	uhalf version;
+	uword debug_abbrev_offset;
+	ubyte pointer_size;
+} __attribute__((packed));
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+	.total_length = -1,
+	.version = 2,
+	.prolog_length = -1,
+	.minimum_instruction_length = 1,	/* could be better when min instruction size != 1 */
+	.default_is_stmt = 1,	/* we don't take care about basic block */
+	.line_base = -5,	/* sensible value for line base ... */
+	.line_range = -14,     /* ... and line range are guessed statically */
+	.opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+	0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+	[DW_LNS_advance_pc]   = 1,
+	[DW_LNS_advance_line] = 1,
+	[DW_LNS_set_file] =  1,
+	[DW_LNS_set_column] = 1,
+	[DW_LNS_fixed_advance_pc] = 1,
+	[DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+	.total_length = -1,
+	.version = 2,
+	.debug_abbrev_offset = 0,     /* we reuse the same abbrev entries for all comp unit */
+	.pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+	buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+	buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+				 unsigned long data)
+{
+	do {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (data)
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	} while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+	int more = 1;
+	int negative = data < 0;
+	int size = sizeof(long) * CHAR_BIT;
+	while (more) {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (negative)
+			data |= - (1 << (size - 7));
+		if ((data == 0 && !(cur & 0x40)) ||
+		    (data == -1l && (cur & 0x40)))
+			more = 0;
+		else
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	}
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+				 void *data, size_t data_len)
+{
+	buffer_ext_add(be, (char *)"", 1);
+
+	emit_unsigned_LEB128(be, data_len + 1);
+
+	buffer_ext_add(be, &opcode, 1);
+	buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+	buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext  *be,
+			       ubyte opcode, long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+				 unsigned long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+	emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext  *be, long delta_lineno)
+{
+	emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+	emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+	emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+				     const char *filename)
+{
+	buffer_ext_add(be, (void *)"", 1);
+
+	/* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+	emit_unsigned_LEB128(be, strlen(filename) + 5);
+	emit_opcode(be, DW_LNE_define_file);
+	emit_string(be, filename);
+	/* directory index 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* last modification date on file 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* filesize 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+				 void *address)
+{
+	emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+				unsigned int last_line,
+				unsigned long last_vma)
+{
+	unsigned int temp;
+	unsigned long delta_addr;
+
+	/*
+	 * delta from line_base
+	 */
+	temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+	if (temp >= default_debug_line_header.line_range)
+		return 0;
+
+	/*
+	 * delta of addresses
+	 */
+	delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+	/* This is not sufficient to ensure opcode will be in [0-256] but
+	 * sufficient to ensure when summing with the delta lineno we will
+	 * not overflow the unsigned long opcode */
+
+	if (delta_addr <= 256 / default_debug_line_header.line_range) {
+		unsigned long opcode = temp +
+			(delta_addr * default_debug_line_header.line_range) +
+			default_debug_line_header.opcode_base;
+
+		return opcode <= 255 ? opcode : 0;
+	}
+	return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+			     struct debug_entry *ent, size_t nr_entry,
+			     unsigned long code_addr)
+{
+	size_t i;
+
+	/*
+	 * Machine state at start of a statement program
+	 * address = 0
+	 * file    = 1
+	 * line    = 1
+	 * column  = 0
+	 * is_stmt = default_is_stmt as given in the debug_line_header
+	 * basic block = 0
+	 * end sequence = 0
+	 */
+
+	/* start state of the state machine we take care of */
+	unsigned long last_vma = code_addr;
+	char const  *cur_filename = NULL;
+	unsigned long cur_file_idx = 0;
+	int last_line = 1;
+
+	emit_lne_set_address(be, (void *)code_addr);
+
+	for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+		int need_copy = 0;
+		ubyte special_opcode;
+
+		/*
+		 * check if filename changed, if so add it
+		 */
+		if (!cur_filename || strcmp(cur_filename, ent->name)) {
+			emit_lne_define_filename(be, ent->name);
+			cur_filename = ent->name;
+			emit_set_file(be, ++cur_file_idx);
+			need_copy = 1;
+		}
+
+		special_opcode = get_special_opcode(ent, last_line, last_vma);
+		if (special_opcode != 0) {
+			last_line = ent->lineno;
+			last_vma  = ent->addr;
+			emit_opcode(be, special_opcode);
+		} else {
+			/*
+			 * lines differ, emit line delta
+			 */
+			if (last_line != ent->lineno) {
+				emit_advance_lineno(be, ent->lineno - last_line);
+				last_line = ent->lineno;
+				need_copy = 1;
+			}
+			/*
+			 * addresses differ, emit address delta
+			 */
+			if (last_vma != ent->addr) {
+				emit_advance_pc(be, ent->addr - last_vma);
+				last_vma = ent->addr;
+				need_copy = 1;
+			}
+			/*
+			 * add new row to matrix
+			 */
+			if (need_copy)
+				emit_opcode(be, DW_LNS_copy);
+		}
+	}
+}
+
+static void add_debug_line(struct buffer_ext *be,
+	struct debug_entry *ent, size_t nr_entry,
+	unsigned long code_addr)
+{
+	struct debug_line_header * dbg_header;
+	size_t old_size;
+
+	old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, (void *)&default_debug_line_header,
+		 sizeof(default_debug_line_header));
+
+	buffer_ext_add(be, &standard_opcode_length,  sizeof(standard_opcode_length));
+
+	// empty directory entry
+	buffer_ext_add(be, (void *)"", 1);
+
+	// empty filename directory
+	buffer_ext_add(be, (void *)"", 1);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, minimum_instruction_length);
+
+	emit_lineno_info(be, ent, nr_entry, code_addr);
+
+	emit_lne_end_of_sequence(be);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+        emit_unsigned_LEB128(be, 1);
+        emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+        emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+        emit_unsigned_LEB128(be, DW_AT_stmt_list);
+        emit_unsigned_LEB128(be, DW_FORM_data4);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+		     size_t offset_debug_line)
+{
+	struct compilation_unit_header *comp_unit_header;
+	size_t old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, &default_comp_unit_header,
+		       sizeof(default_comp_unit_header));
+
+	emit_unsigned_LEB128(be, 1);
+	emit_uword(be, offset_debug_line);
+
+	comp_unit_header = buffer_ext_addr(be) + old_size;
+	comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+		       void *debug, int nr_debug_entries,
+		       struct buffer_ext *dl,
+		       struct buffer_ext *da,
+		       struct buffer_ext *di)
+{
+	struct debug_entry *ent = debug;
+	int i;
+
+	for (i = 0; i < nr_debug_entries; i++) {
+		ent->addr = ent->addr - code_addr;
+		ent = debug_entry_next(ent);
+	}
+	add_compilation_unit(di, buffer_ext_size(dl));
+	add_debug_line(dl, debug, nr_debug_entries, 0);
+	add_debug_abbrev(da);
+	if (0) buffer_ext_dump(da, "abbrev");
+
+	return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Shdr *shdr;
+	struct buffer_ext dl, di, da;
+	int ret;
+
+	buffer_ext_init(&dl);
+	buffer_ext_init(&di);
+	buffer_ext_init(&da);
+
+	ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+	if (ret)
+		return -1;
+	/*
+	 * setup .debug_line section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&dl);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&dl);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 52; /* .debug_line */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_info section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&di);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&di);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 64; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_abbrev section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&da);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&da);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 76; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * now we update the ELF image with all the sections
+	 */
+	if (elf_update(e, ELF_C_WRITE) < 0) {
+		warnx("elf_update debug failed");
+		return -1;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f50b723..73e38e4 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -23,6 +23,8 @@
 #include "strbuf.h"
 #include "build-id.h"
 #include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
 
 /*
  * magic2 = "PERFILE2"
@@ -868,6 +870,199 @@
 	return err;
 }
 
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+	struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+	return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+	if (a->level != b->level)
+		return false;
+
+	if (a->line_size != b->line_size)
+		return false;
+
+	if (a->sets != b->sets)
+		return false;
+
+	if (a->ways != b->ways)
+		return false;
+
+	if (strcmp(a->type, b->type))
+		return false;
+
+	if (strcmp(a->size, b->size))
+		return false;
+
+	if (strcmp(a->map, b->map))
+		return false;
+
+	return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+	char path[PATH_MAX], file[PATH_MAX];
+	struct stat st;
+	size_t len;
+
+	scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+	scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+	if (stat(file, &st))
+		return 1;
+
+	scnprintf(file, PATH_MAX, "%s/level", path);
+	if (sysfs__read_int(file, (int *) &cache->level))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+	if (sysfs__read_int(file, (int *) &cache->line_size))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+	if (sysfs__read_int(file, (int *) &cache->sets))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+	if (sysfs__read_int(file, (int *) &cache->ways))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/type", path);
+	if (sysfs__read_str(file, &cache->type, &len))
+		return -1;
+
+	cache->type[len] = 0;
+	cache->type = rtrim(cache->type);
+
+	scnprintf(file, PATH_MAX, "%s/size", path);
+	if (sysfs__read_str(file, &cache->size, &len)) {
+		free(cache->type);
+		return -1;
+	}
+
+	cache->size[len] = 0;
+	cache->size = rtrim(cache->size);
+
+	scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+	if (sysfs__read_str(file, &cache->map, &len)) {
+		free(cache->map);
+		free(cache->type);
+		return -1;
+	}
+
+	cache->map[len] = 0;
+	cache->map = rtrim(cache->map);
+	return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+	fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+{
+	u32 i, cnt = 0;
+	long ncpus;
+	u32 nr, cpu;
+	u16 level;
+
+	ncpus = sysconf(_SC_NPROCESSORS_CONF);
+	if (ncpus < 0)
+		return -1;
+
+	nr = (u32)(ncpus & UINT_MAX);
+
+	for (cpu = 0; cpu < nr; cpu++) {
+		for (level = 0; level < 10; level++) {
+			struct cpu_cache_level c;
+			int err;
+
+			err = cpu_cache_level__read(&c, cpu, level);
+			if (err < 0)
+				return err;
+
+			if (err == 1)
+				break;
+
+			for (i = 0; i < cnt; i++) {
+				if (cpu_cache_level__cmp(&c, &caches[i]))
+					break;
+			}
+
+			if (i == cnt)
+				caches[cnt++] = c;
+			else
+				cpu_cache_level__free(&c);
+
+			if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
+				goto out;
+		}
+	}
+ out:
+	*cntp = cnt;
+	return 0;
+}
+
+#define MAX_CACHES 2000
+
+static int write_cache(int fd, struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
+{
+	struct cpu_cache_level caches[MAX_CACHES];
+	u32 cnt = 0, i, version = 1;
+	int ret;
+
+	ret = build_caches(caches, MAX_CACHES, &cnt);
+	if (ret)
+		goto out;
+
+	qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+	ret = do_write(fd, &version, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(fd, &cnt, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level *c = &caches[i];
+
+		#define _W(v)					\
+			ret = do_write(fd, &c->v, sizeof(u32));	\
+			if (ret < 0)				\
+				goto out;
+
+		_W(level)
+		_W(line_size)
+		_W(sets)
+		_W(ways)
+		#undef _W
+
+		#define _W(v)						\
+			ret = do_write_string(fd, (const char *) c->v);	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(type)
+		_W(size)
+		_W(map)
+		#undef _W
+	}
+
+out:
+	for (i = 0; i < cnt; i++)
+		cpu_cache_level__free(&caches[i]);
+	return ret;
+}
+
 static int write_stat(int fd __maybe_unused,
 		      struct perf_header *h __maybe_unused,
 		      struct perf_evlist *evlist __maybe_unused)
@@ -1172,6 +1367,18 @@
 	fprintf(fp, "# contains stat data\n");
 }
 
+static void print_cache(struct perf_header *ph __maybe_unused,
+			int fd __maybe_unused, FILE *fp __maybe_unused)
+{
+	int i;
+
+	fprintf(fp, "# CPU cache info:\n");
+	for (i = 0; i < ph->env.caches_cnt; i++) {
+		fprintf(fp, "#  ");
+		cpu_cache_level__fprintf(fp, &ph->env.caches[i]);
+	}
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
 			       FILE *fp)
 {
@@ -1920,6 +2127,68 @@
 	return err;
 }
 
+static int process_cache(struct perf_file_section *section __maybe_unused,
+			 struct perf_header *ph __maybe_unused, int fd __maybe_unused,
+			 void *data __maybe_unused)
+{
+	struct cpu_cache_level *caches;
+	u32 cnt, i, version;
+
+	if (readn(fd, &version, sizeof(version)) != sizeof(version))
+		return -1;
+
+	if (ph->needs_swap)
+		version = bswap_32(version);
+
+	if (version != 1)
+		return -1;
+
+	if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt))
+		return -1;
+
+	if (ph->needs_swap)
+		cnt = bswap_32(cnt);
+
+	caches = zalloc(sizeof(*caches) * cnt);
+	if (!caches)
+		return -1;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level c;
+
+		#define _R(v)						\
+			if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\
+				goto out_free_caches;			\
+			if (ph->needs_swap)				\
+				c.v = bswap_32(c.v);			\
+
+		_R(level)
+		_R(line_size)
+		_R(sets)
+		_R(ways)
+		#undef _R
+
+		#define _R(v)				\
+			c.v = do_read_string(fd, ph);	\
+			if (!c.v)			\
+				goto out_free_caches;
+
+		_R(type)
+		_R(size)
+		_R(map)
+		#undef _R
+
+		caches[i] = c;
+	}
+
+	ph->env.caches = caches;
+	ph->env.caches_cnt = cnt;
+	return 0;
+out_free_caches:
+	free(caches);
+	return -1;
+}
+
 struct feature_ops {
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1962,6 +2231,7 @@
 	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
 	FEAT_OPP(HEADER_AUXTRACE,	auxtrace),
 	FEAT_OPA(HEADER_STAT,		stat),
+	FEAT_OPF(HEADER_CACHE,		cache),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cff9892..3d87ca8 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -32,6 +32,7 @@
 	HEADER_GROUP_DESC,
 	HEADER_AUXTRACE,
 	HEADER_STAT,
+	HEADER_CACHE,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index dc1e41c..43a98a4 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -6,7 +6,8 @@
 static int autocorrect;
 static struct cmdnames aliases;
 
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+static int perf_unknown_cmd_config(const char *var, const char *value,
+				   void *cb __maybe_unused)
 {
 	if (!strcmp(var, "help.autocorrect"))
 		autocorrect = perf_config_int(var,value);
@@ -14,7 +15,7 @@
 	if (!prefixcmp(var, "alias."))
 		add_cmdname(&aliases, var + 6, strlen(var + 6));
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int levenshtein_compare(const void *p1, const void *p2)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c226303..290b3cb 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -131,6 +131,8 @@
 			symlen = unresolved_col_width + 4 + 2;
 			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
 					   symlen);
+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+					   symlen);
 		}
 
 		if (h->mem_info->iaddr.sym) {
@@ -177,6 +179,9 @@
 	if (h->transaction)
 		hists__new_col_len(hists, HISTC_TRANSACTION,
 				   hist_entry__transaction_len());
+
+	if (h->trace_output)
+		hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -243,6 +248,8 @@
 	/* XXX need decay for weight too? */
 }
 
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 {
 	u64 prev_period = he->stat.period;
@@ -258,21 +265,45 @@
 
 	diff = prev_period - he->stat.period;
 
-	hists->stats.total_period -= diff;
-	if (!he->filtered)
-		hists->stats.total_non_filtered_period -= diff;
+	if (!he->depth) {
+		hists->stats.total_period -= diff;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period -= diff;
+	}
+
+	if (!he->leaf) {
+		struct hist_entry *child;
+		struct rb_node *node = rb_first(&he->hroot_out);
+		while (node) {
+			child = rb_entry(node, struct hist_entry, rb_node);
+			node = rb_next(node);
+
+			if (hists__decay_entry(hists, child))
+				hists__delete_entry(hists, child);
+		}
+	}
 
 	return he->stat.period == 0;
 }
 
 static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
 {
-	rb_erase(&he->rb_node, &hists->entries);
+	struct rb_root *root_in;
+	struct rb_root *root_out;
 
-	if (sort__need_collapse)
-		rb_erase(&he->rb_node_in, &hists->entries_collapsed);
-	else
-		rb_erase(&he->rb_node_in, hists->entries_in);
+	if (he->parent_he) {
+		root_in  = &he->parent_he->hroot_in;
+		root_out = &he->parent_he->hroot_out;
+	} else {
+		if (sort__need_collapse)
+			root_in = &hists->entries_collapsed;
+		else
+			root_in = hists->entries_in;
+		root_out = &hists->entries;
+	}
+
+	rb_erase(&he->rb_node_in, root_in);
+	rb_erase(&he->rb_node, root_out);
 
 	--hists->nr_entries;
 	if (!he->filtered)
@@ -391,6 +422,9 @@
 		}
 		INIT_LIST_HEAD(&he->pairs.node);
 		thread__get(he->thread);
+
+		if (!symbol_conf.report_hierarchy)
+			he->leaf = true;
 	}
 
 	return he;
@@ -403,6 +437,16 @@
 	return 0;
 }
 
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	he->hists->callchain_period += period;
+	if (!he->filtered)
+		he->hists->callchain_non_filtered_period += period;
+}
+
 static struct hist_entry *hists__findnew_entry(struct hists *hists,
 					       struct hist_entry *entry,
 					       struct addr_location *al,
@@ -430,8 +474,10 @@
 		cmp = hist_entry__cmp(he, entry);
 
 		if (!cmp) {
-			if (sample_self)
+			if (sample_self) {
 				he_stat__add_period(&he->stat, period, weight);
+				hist_entry__add_callchain_period(he, period);
+			}
 			if (symbol_conf.cumulate_callchain)
 				he_stat__add_period(he->stat_acc, period, weight);
 
@@ -464,6 +510,8 @@
 	if (!he)
 		return NULL;
 
+	if (sample_self)
+		hist_entry__add_callchain_period(he, period);
 	hists->nr_entries++;
 
 	rb_link_node(&he->rb_node_in, parent, p);
@@ -949,10 +997,15 @@
 int64_t
 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
+	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
 
-	perf_hpp__for_each_sort_list(fmt) {
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
 		cmp = fmt->cmp(fmt, left, right);
 		if (cmp)
 			break;
@@ -964,10 +1017,15 @@
 int64_t
 hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 {
+	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
 
-	perf_hpp__for_each_sort_list(fmt) {
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
 		cmp = fmt->collapse(fmt, left, right);
 		if (cmp)
 			break;
@@ -1004,17 +1062,250 @@
 }
 
 /*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max lenght for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed)
+{
+	if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+		const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists));
+		if (printed < width) {
+			advance_hpp(hpp, printed);
+			printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+		}
+	}
+
+	return printed;
+}
+
+/*
  * collapse the histogram
  */
 
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
-				  struct rb_root *root, struct hist_entry *he)
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+				       enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+	return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+						enum hist_filter type,
+						fmt_chk_fn check)
+{
+	struct perf_hpp_fmt *fmt;
+	bool type_match = false;
+	struct hist_entry *parent = he->parent_he;
+
+	switch (type) {
+	case HIST_FILTER__THREAD:
+		if (symbol_conf.comm_list == NULL &&
+		    symbol_conf.pid_list == NULL &&
+		    symbol_conf.tid_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__DSO:
+		if (symbol_conf.dso_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__SYMBOL:
+		if (symbol_conf.sym_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__PARENT:
+	case HIST_FILTER__GUEST:
+	case HIST_FILTER__HOST:
+	case HIST_FILTER__SOCKET:
+	default:
+		return;
+	}
+
+	/* if it's filtered by own fmt, it has to have filter bits */
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (check(fmt)) {
+			type_match = true;
+			break;
+		}
+	}
+
+	if (type_match) {
+		/*
+		 * If the filter is for current level entry, propagate
+		 * filter marker to parents.  The marker bit was
+		 * already set by default so it only needs to clear
+		 * non-filtered entries.
+		 */
+		if (!(he->filtered & (1 << type))) {
+			while (parent) {
+				parent->filtered &= ~(1 << type);
+				parent = parent->parent_he;
+			}
+		}
+	} else {
+		/*
+		 * If current entry doesn't have matching formats, set
+		 * filter marker for upper level entries.  it will be
+		 * cleared if its lower level entries is not filtered.
+		 *
+		 * For lower-level entries, it inherits parent's
+		 * filter bit so that lower level entries of a
+		 * non-filtered entry won't set the filter marker.
+		 */
+		if (parent == NULL)
+			he->filtered |= (1 << type);
+		else
+			he->filtered |= (parent->filtered & (1 << type));
+	}
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+					    check_thread_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+					    perf_hpp__is_dso_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+					    perf_hpp__is_sym_entry);
+
+	hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+						 struct rb_root *root,
+						 struct hist_entry *he,
+						 struct hist_entry *parent_he,
+						 struct perf_hpp_list *hpp_list)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter, *new;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = 0;
+		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (!cmp) {
+			he_stat__add_stat(&iter->stat, &he->stat);
+			return iter;
+		}
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	new = hist_entry__new(he, true);
+	if (new == NULL)
+		return NULL;
+
+	hists->nr_entries++;
+
+	/* save related format list for output */
+	new->hpp_list = hpp_list;
+	new->parent_he = parent_he;
+
+	hist_entry__apply_hierarchy_filters(new);
+
+	/* some fields are now passed to 'new' */
+	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+		if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			he->trace_output = NULL;
+		else
+			new->trace_output = NULL;
+
+		if (perf_hpp__is_srcline_entry(fmt))
+			he->srcline = NULL;
+		else
+			new->srcline = NULL;
+
+		if (perf_hpp__is_srcfile_entry(fmt))
+			he->srcfile = NULL;
+		else
+			new->srcfile = NULL;
+	}
+
+	rb_link_node(&new->rb_node_in, parent, p);
+	rb_insert_color(&new->rb_node_in, root);
+	return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+					 struct rb_root *root,
+					 struct hist_entry *he)
+{
+	struct perf_hpp_list_node *node;
+	struct hist_entry *new_he = NULL;
+	struct hist_entry *parent = NULL;
+	int depth = 0;
+	int ret = 0;
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		/* skip period (overhead) and elided columns */
+		if (node->level == 0 || node->skip)
+			continue;
+
+		/* insert copy of 'he' for each fmt into the hierarchy */
+		new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+		if (new_he == NULL) {
+			ret = -1;
+			break;
+		}
+
+		root = &new_he->hroot_in;
+		new_he->depth = depth++;
+		parent = new_he;
+	}
+
+	if (new_he) {
+		new_he->leaf = true;
+
+		if (symbol_conf.use_callchain) {
+			callchain_cursor_reset(&callchain_cursor);
+			if (callchain_merge(&callchain_cursor,
+					    new_he->callchain,
+					    he->callchain) < 0)
+				ret = -1;
+		}
+	}
+
+	/* 'he' is no longer used */
+	hist_entry__delete(he);
+
+	/* return 0 (or -1) since it already applied filters */
+	return ret;
+}
+
+int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
+				 struct hist_entry *he)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
 	int64_t cmp;
 
+	if (symbol_conf.report_hierarchy)
+		return hists__hierarchy_insert_entry(hists, root, he);
+
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node_in);
@@ -1022,18 +1313,21 @@
 		cmp = hist_entry__collapse(iter, he);
 
 		if (!cmp) {
+			int ret = 0;
+
 			he_stat__add_stat(&iter->stat, &he->stat);
 			if (symbol_conf.cumulate_callchain)
 				he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
 			if (symbol_conf.use_callchain) {
 				callchain_cursor_reset(&callchain_cursor);
-				callchain_merge(&callchain_cursor,
-						iter->callchain,
-						he->callchain);
+				if (callchain_merge(&callchain_cursor,
+						    iter->callchain,
+						    he->callchain) < 0)
+					ret = -1;
 			}
 			hist_entry__delete(he);
-			return false;
+			return ret;
 		}
 
 		if (cmp < 0)
@@ -1045,7 +1339,7 @@
 
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, root);
-	return true;
+	return 1;
 }
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
@@ -1071,14 +1365,15 @@
 	hists__filter_entry_by_socket(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 {
 	struct rb_root *root;
 	struct rb_node *next;
 	struct hist_entry *n;
+	int ret;
 
 	if (!sort__need_collapse)
-		return;
+		return 0;
 
 	hists->nr_entries = 0;
 
@@ -1093,7 +1388,11 @@
 		next = rb_next(&n->rb_node_in);
 
 		rb_erase(&n->rb_node_in, root);
-		if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) {
+		ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+		if (ret < 0)
+			return -1;
+
+		if (ret) {
 			/*
 			 * If it wasn't combined with one of the entries already
 			 * collapsed, we need to apply the filters that may have
@@ -1104,14 +1403,16 @@
 		if (prog)
 			ui_progress__update(prog, 1);
 	}
+	return 0;
 }
 
 static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
 {
+	struct hists *hists = a->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
 
-	perf_hpp__for_each_sort_list(fmt) {
+	hists__for_each_sort_list(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, a->hists))
 			continue;
 
@@ -1152,6 +1453,113 @@
 	hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	node = rb_first(&hists->entries);
+
+	hists->stats.total_period = 0;
+	hists->stats.total_non_filtered_period = 0;
+
+	/*
+	 * recalculate total period using top-level entries only
+	 * since lower level entries only see non-filtered entries
+	 * but upper level entries have sum of both entries.
+	 */
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node);
+		node = rb_next(node);
+
+		hists->stats.total_period += he->stat.period;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period += he->stat.period;
+	}
+}
+
+static void hierarchy_insert_output_entry(struct rb_root *root,
+					  struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	/* update column width of dynamic entry */
+	perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt))
+			fmt->sort(fmt, he, NULL);
+	}
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+					   struct ui_progress *prog,
+					   struct rb_root *root_in,
+					   struct rb_root *root_out,
+					   u64 min_callchain_hits,
+					   bool use_callchain)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	*root_out = RB_ROOT;
+	node = rb_first(root_in);
+
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		node = rb_next(node);
+
+		hierarchy_insert_output_entry(root_out, he);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+
+		if (!he->leaf) {
+			hists__hierarchy_output_resort(hists, prog,
+						       &he->hroot_in,
+						       &he->hroot_out,
+						       min_callchain_hits,
+						       use_callchain);
+			hists->nr_entries++;
+			if (!he->filtered) {
+				hists->nr_non_filtered_entries++;
+				hists__calc_col_len(hists, he);
+			}
+
+			continue;
+		}
+
+		if (!use_callchain)
+			continue;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+	}
+}
+
 static void __hists__insert_output_entry(struct rb_root *entries,
 					 struct hist_entry *he,
 					 u64 min_callchain_hits,
@@ -1160,10 +1568,20 @@
 	struct rb_node **p = &entries->rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
 
-	if (use_callchain)
+	if (use_callchain) {
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
 		callchain_param.sort(&he->sorted_chain, he->callchain,
 				      min_callchain_hits, &callchain_param);
+	}
 
 	while (*p != NULL) {
 		parent = *p;
@@ -1177,23 +1595,41 @@
 
 	rb_link_node(&he->rb_node, parent, p);
 	rb_insert_color(&he->rb_node, entries);
+
+	perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    perf_hpp__defined_dynamic_entry(fmt, he->hists))
+			fmt->sort(fmt, he, NULL);  /* update column width */
+	}
 }
 
-void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+			  bool use_callchain)
 {
 	struct rb_root *root;
 	struct rb_node *next;
 	struct hist_entry *n;
+	u64 callchain_total;
 	u64 min_callchain_hits;
-	struct perf_evsel *evsel = hists_to_evsel(hists);
-	bool use_callchain;
 
-	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
-		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
-	else
-		use_callchain = symbol_conf.use_callchain;
+	callchain_total = hists->callchain_period;
+	if (symbol_conf.filter_relative)
+		callchain_total = hists->callchain_non_filtered_period;
 
-	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
+	min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+	hists__reset_stats(hists);
+	hists__reset_col_len(hists);
+
+	if (symbol_conf.report_hierarchy) {
+		hists__hierarchy_output_resort(hists, prog,
+					       &hists->entries_collapsed,
+					       &hists->entries,
+					       min_callchain_hits,
+					       use_callchain);
+		hierarchy_recalc_total_periods(hists);
+		return;
+	}
 
 	if (sort__need_collapse)
 		root = &hists->entries_collapsed;
@@ -1203,9 +1639,6 @@
 	next = rb_first(root);
 	hists->entries = RB_ROOT;
 
-	hists__reset_stats(hists);
-	hists__reset_col_len(hists);
-
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&n->rb_node_in);
@@ -1221,15 +1654,136 @@
 	}
 }
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+	bool use_callchain;
+
+	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+	else
+		use_callchain = symbol_conf.use_callchain;
+
+	output_resort(evsel__hists(evsel), prog, use_callchain);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+	if (he->leaf || hmd == HMD_FORCE_SIBLING)
+		return false;
+
+	if (he->unfolded || hmd == HMD_FORCE_CHILD)
+		return true;
+
+	return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	while (can_goto_child(he, HMD_NORMAL)) {
+		node = rb_last(&he->hroot_out);
+		he = rb_entry(node, struct hist_entry, rb_node);
+	}
+	return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	if (can_goto_child(he, hmd))
+		node = rb_first(&he->hroot_out);
+	else
+		node = rb_next(node);
+
+	while (node == NULL) {
+		he = he->parent_he;
+		if (he == NULL)
+			break;
+
+		node = rb_next(&he->rb_node);
+	}
+	return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	node = rb_prev(node);
+	if (node)
+		return rb_hierarchy_last(node);
+
+	he = he->parent_he;
+	if (he == NULL)
+		return NULL;
+
+	return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+	struct rb_node *node;
+	struct hist_entry *child;
+	float percent;
+
+	if (he->leaf)
+		return false;
+
+	node = rb_first(&he->hroot_out);
+	child = rb_entry(node, struct hist_entry, rb_node);
+
+	while (node && child->filtered) {
+		node = rb_next(node);
+		child = rb_entry(node, struct hist_entry, rb_node);
+	}
+
+	if (node)
+		percent = hist_entry__get_percent_limit(child);
+	else
+		percent = 0;
+
+	return node && percent >= limit;
+}
+
 static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
 				       enum hist_filter filter)
 {
 	h->filtered &= ~(1 << filter);
+
+	if (symbol_conf.report_hierarchy) {
+		struct hist_entry *parent = h->parent_he;
+
+		while (parent) {
+			he_stat__add_stat(&parent->stat, &h->stat);
+
+			parent->filtered &= ~(1 << filter);
+
+			if (parent->filtered)
+				goto next;
+
+			/* force fold unfiltered entry for simplicity */
+			parent->unfolded = false;
+			parent->has_no_entry = false;
+			parent->row_offset = 0;
+			parent->nr_rows = 0;
+next:
+			parent = parent->parent_he;
+		}
+	}
+
 	if (h->filtered)
 		return;
 
 	/* force fold unfiltered entry for simplicity */
 	h->unfolded = false;
+	h->has_no_entry = false;
 	h->row_offset = 0;
 	h->nr_rows = 0;
 
@@ -1252,28 +1806,6 @@
 	return false;
 }
 
-void hists__filter_by_dso(struct hists *hists)
-{
-	struct rb_node *nd;
-
-	hists->stats.nr_non_filtered_samples = 0;
-
-	hists__reset_filter_stats(hists);
-	hists__reset_col_len(hists);
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-		if (symbol_conf.exclude_other && !h->parent)
-			continue;
-
-		if (hists__filter_entry_by_dso(hists, h))
-			continue;
-
-		hists__remove_entry_filter(hists, h, HIST_FILTER__DSO);
-	}
-}
-
 static bool hists__filter_entry_by_thread(struct hists *hists,
 					  struct hist_entry *he)
 {
@@ -1286,25 +1818,6 @@
 	return false;
 }
 
-void hists__filter_by_thread(struct hists *hists)
-{
-	struct rb_node *nd;
-
-	hists->stats.nr_non_filtered_samples = 0;
-
-	hists__reset_filter_stats(hists);
-	hists__reset_col_len(hists);
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-		if (hists__filter_entry_by_thread(hists, h))
-			continue;
-
-		hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD);
-	}
-}
-
 static bool hists__filter_entry_by_symbol(struct hists *hists,
 					  struct hist_entry *he)
 {
@@ -1318,25 +1831,6 @@
 	return false;
 }
 
-void hists__filter_by_symbol(struct hists *hists)
-{
-	struct rb_node *nd;
-
-	hists->stats.nr_non_filtered_samples = 0;
-
-	hists__reset_filter_stats(hists);
-	hists__reset_col_len(hists);
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-		if (hists__filter_entry_by_symbol(hists, h))
-			continue;
-
-		hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL);
-	}
-}
-
 static bool hists__filter_entry_by_socket(struct hists *hists,
 					  struct hist_entry *he)
 {
@@ -1349,7 +1843,9 @@
 	return false;
 }
 
-void hists__filter_by_socket(struct hists *hists)
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
 {
 	struct rb_node *nd;
 
@@ -1361,13 +1857,157 @@
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-		if (hists__filter_entry_by_socket(hists, h))
+		if (filter(hists, h))
 			continue;
 
-		hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET);
+		hists__remove_entry_filter(hists, h, type);
 	}
 }
 
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct rb_root new_root = RB_ROOT;
+	struct rb_node *nd;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	if (he->leaf || he->filtered)
+		return;
+
+	nd = rb_first(&he->hroot_out);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &he->hroot_out);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+	struct rb_node *nd;
+	struct rb_root new_root = RB_ROOT;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		int ret;
+
+		ret = hist_entry__filter(h, type, arg);
+
+		/*
+		 * case 1. non-matching type
+		 * zero out the period, set filter marker and move to child
+		 */
+		if (ret < 0) {
+			memset(&h->stat, 0, sizeof(h->stat));
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+		}
+		/*
+		 * case 2. matched type (filter out)
+		 * set filter marker and move to next
+		 */
+		else if (ret == 1) {
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+		/*
+		 * case 3. ok (not filtered)
+		 * add period to hists and parents, erase the filter marker
+		 * and move to next sibling
+		 */
+		else {
+			hists__remove_entry_filter(hists, h, type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+	}
+
+	hierarchy_recalc_total_periods(hists);
+
+	/*
+	 * resort output after applying a new filter since filter in a lower
+	 * hierarchy can change periods in a upper hierarchy.
+	 */
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &hists->entries);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+					hists->thread_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__THREAD,
+				      hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+					hists->dso_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__DSO,
+				      hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+					hists->symbol_filter_str);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+				      hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+					&hists->socket_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+				      hists__filter_entry_by_socket);
+}
+
 void events_stats__inc(struct events_stats *stats, u32 type)
 {
 	++stats->nr_events[0];
@@ -1583,7 +2223,7 @@
 	return 0;
 }
 
-int __hists__init(struct hists *hists)
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
 {
 	memset(hists, 0, sizeof(*hists));
 	hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
@@ -1592,6 +2232,8 @@
 	hists->entries = RB_ROOT;
 	pthread_mutex_init(&hists->lock, NULL);
 	hists->socket_filter = -1;
+	hists->hpp_list = hpp_list;
+	INIT_LIST_HEAD(&hists->hpp_formats);
 	return 0;
 }
 
@@ -1620,15 +2262,26 @@
 static void hists_evsel__exit(struct perf_evsel *evsel)
 {
 	struct hists *hists = evsel__hists(evsel);
+	struct perf_hpp_fmt *fmt, *pos;
+	struct perf_hpp_list_node *node, *tmp;
 
 	hists__delete_all_entries(hists);
+
+	list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+			list_del(&fmt->list);
+			free(fmt);
+		}
+		list_del(&node->list);
+		free(node);
+	}
 }
 
 static int hists_evsel__init(struct perf_evsel *evsel)
 {
 	struct hists *hists = evsel__hists(evsel);
 
-	__hists__init(hists);
+	__hists__init(hists, &perf_hpp_list);
 	return 0;
 }
 
@@ -1647,3 +2300,9 @@
 
 	return err;
 }
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+	INIT_LIST_HEAD(&list->fields);
+	INIT_LIST_HEAD(&list->sorts);
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index d4ec482..ead18c8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -66,6 +66,8 @@
 	struct rb_root		entries_collapsed;
 	u64			nr_entries;
 	u64			nr_non_filtered_entries;
+	u64			callchain_period;
+	u64			callchain_non_filtered_period;
 	struct thread		*thread_filter;
 	const struct dso	*dso_filter;
 	const char		*uid_filter_str;
@@ -75,6 +77,9 @@
 	u64			event_stream;
 	u16			col_len[HISTC_NR_COLS];
 	int			socket_filter;
+	struct perf_hpp_list	*hpp_list;
+	struct list_head	hpp_formats;
+	int			nr_hpp_node;
 };
 
 struct hist_entry_iter;
@@ -121,15 +126,21 @@
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 			 int max_stack_depth, void *arg);
 
+struct perf_hpp;
+struct perf_hpp_fmt;
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
 			      struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed);
 void hist_entry__delete(struct hist_entry *he);
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
 void hists__output_resort(struct hists *hists, struct ui_progress *prog);
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__delete_entries(struct hists *hists);
@@ -185,10 +196,10 @@
 }
 
 int hists__init(void);
-int __hists__init(struct hists *hists);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+int hists__collapse_insert_entry(struct hists *hists,
 				  struct rb_root *root, struct hist_entry *he);
 
 struct perf_hpp {
@@ -214,28 +225,64 @@
 			    struct hist_entry *a, struct hist_entry *b);
 	int64_t (*sort)(struct perf_hpp_fmt *fmt,
 			struct hist_entry *a, struct hist_entry *b);
+	bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+	void (*free)(struct perf_hpp_fmt *fmt);
 
 	struct list_head list;
 	struct list_head sort_list;
 	bool elide;
 	int len;
 	int user_len;
+	int idx;
+	int level;
 };
 
-extern struct list_head perf_hpp__list;
-extern struct list_head perf_hpp__sort_list;
+struct perf_hpp_list {
+	struct list_head fields;
+	struct list_head sorts;
+};
 
-#define perf_hpp__for_each_format(format) \
-	list_for_each_entry(format, &perf_hpp__list, list)
+extern struct perf_hpp_list perf_hpp_list;
 
-#define perf_hpp__for_each_format_safe(format, tmp)	\
-	list_for_each_entry_safe(format, tmp, &perf_hpp__list, list)
+struct perf_hpp_list_node {
+	struct list_head	list;
+	struct perf_hpp_list	hpp;
+	int			level;
+	bool			skip;
+};
 
-#define perf_hpp__for_each_sort_list(format) \
-	list_for_each_entry(format, &perf_hpp__sort_list, sort_list)
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format);
 
-#define perf_hpp__for_each_sort_list_safe(format, tmp)	\
-	list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list)
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+	list_for_each_entry(format, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+	list_for_each_entry(format, &(_list)->sorts, sort_list)
+
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
+
+#define hists__for_each_format(hists, format) \
+	perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
+
+#define hists__for_each_sort_list(hists, format) \
+	perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
 
 extern struct perf_hpp_fmt perf_hpp__format[];
 
@@ -254,21 +301,29 @@
 };
 
 void perf_hpp__init(void);
-void perf_hpp__column_register(struct perf_hpp_fmt *format);
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
-void perf_hpp__column_enable(unsigned col);
-void perf_hpp__column_disable(unsigned col);
 void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct perf_evlist *evlist);
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
-void perf_hpp__setup_output_field(void);
-void perf_hpp__reset_output_field(void);
-void perf_hpp__append_sort_keys(void);
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
 bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
 
 static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
 					 struct hists *hists)
@@ -372,6 +427,7 @@
 #endif
 
 unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
 
 void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
 			  struct perf_sample *sample, bool nonany_branch_mode);
@@ -381,4 +437,26 @@
 			    const char *arg, int unset __maybe_unused);
 int perf_hist_config(const char *var, const char *value);
 
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+	HMD_NORMAL,
+	HMD_FORCE_SIBLING,
+	HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+				    enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+	return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+
 #endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 81a2eb7..05d8158 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2068,6 +2068,15 @@
 		err = -ENOMEM;
 		goto err_free_queues;
 	}
+
+	/*
+	 * Since this thread will not be kept in any rbtree not in a
+	 * list, initialize its list node so that at thread__put() the
+	 * current thread lifetime assuption is kept and we don't segfault
+	 * at list_del_init().
+	 */
+	INIT_LIST_HEAD(&pt->unknown_thread->node);
+
 	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
 	if (err)
 		goto err_delete_thread;
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644
index 0000000..a1e99da
--- /dev/null
+++ b/tools/perf/util/jit.h
@@ -0,0 +1,15 @@
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+extern int jit_process(struct perf_session *session,
+		       struct perf_data_file *output,
+		       struct machine *machine,
+		       char *filename,
+		       pid_t pid,
+		       u64 *nbytes);
+
+extern int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644
index 0000000..cd272cc
--- /dev/null
+++ b/tools/perf/util/jitdump.c
@@ -0,0 +1,697 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "symbol.h"
+#include "strlist.h"
+#include <elf.h>
+
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "../builtin.h"
+
+struct jit_buf_desc {
+	struct perf_data_file *output;
+	struct perf_session *session;
+	struct machine *machine;
+	union jr_entry   *entry;
+	void             *buf;
+	uint64_t	 sample_type;
+	size_t           bufsize;
+	FILE             *in;
+	bool		 needs_bswap; /* handles cross-endianess */
+	void		 *debug_data;
+	size_t		 nr_debug_entries;
+	uint32_t         code_load_count;
+	u64		 bytes_written;
+	struct rb_root   code_root;
+	char		 dir[PATH_MAX];
+};
+
+struct debug_line_info {
+	unsigned long vma;
+	unsigned int lineno;
+	/* The filename format is unspecified, absolute path, relative etc. */
+	char const filename[0];
+};
+
+struct jit_tool {
+	struct perf_tool tool;
+	struct perf_data_file	output;
+	struct perf_data_file	input;
+	u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(char *filename,
+	     const char *sym,
+	     uint64_t code_addr,
+	     const void *code,
+	     int csize,
+	     void *debug,
+	     int nr_debug_entries)
+{
+	int ret, fd;
+
+	if (verbose > 0)
+		fprintf(stderr, "write ELF image %s\n", filename);
+
+	fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+	if (fd == -1) {
+		pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+		return -1;
+	}
+
+        ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries);
+
+        close(fd);
+
+        if (ret)
+                unlink(filename);
+
+	return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+	if (!(jd && jd->in))
+		return;
+	funlockfile(jd->in);
+	fclose(jd->in);
+	jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+	struct perf_evsel *evsel;
+
+	/*
+	 * check that all events use CLOCK_MONOTONIC
+	 */
+	evlist__for_each(session->evlist, evsel) {
+		if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
+			return -1;
+	}
+	return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+	struct jitheader header;
+	struct jr_prefix *prefix;
+	ssize_t bs, bsz = 0;
+	void *n, *buf = NULL;
+	int ret, retval = -1;
+
+	jd->in = fopen(name, "r");
+	if (!jd->in)
+		return -1;
+
+	bsz = hmax(sizeof(header), sizeof(*prefix));
+
+	buf = malloc(bsz);
+	if (!buf)
+		goto error;
+
+	/*
+	 * protect from writer modifying the file while we are reading it
+	 */
+	flockfile(jd->in);
+
+	ret = fread(buf, sizeof(header), 1, jd->in);
+	if (ret != 1)
+		goto error;
+
+	memcpy(&header, buf, sizeof(header));
+
+	if (header.magic != JITHEADER_MAGIC) {
+		if (header.magic != JITHEADER_MAGIC_SW)
+			goto error;
+		jd->needs_bswap = true;
+	}
+
+	if (jd->needs_bswap) {
+		header.version    = bswap_32(header.version);
+		header.total_size = bswap_32(header.total_size);
+		header.pid	  = bswap_32(header.pid);
+		header.elf_mach   = bswap_32(header.elf_mach);
+		header.timestamp  = bswap_64(header.timestamp);
+		header.flags      = bswap_64(header.flags);
+	}
+
+	if (verbose > 2)
+		pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+			header.version,
+			header.total_size,
+			(unsigned long long)header.timestamp,
+			header.pid,
+			header.elf_mach);
+
+	if (header.flags & JITDUMP_FLAGS_RESERVED) {
+		pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+		       (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+		goto error;
+	}
+
+	/*
+	 * validate event is using the correct clockid
+	 */
+	if (jit_validate_events(jd->session)) {
+		pr_err("error, jitted code must be sampled with perf record -k 1\n");
+		goto error;
+	}
+
+	bs = header.total_size - sizeof(header);
+
+	if (bs > bsz) {
+		n = realloc(buf, bs);
+		if (!n)
+			goto error;
+		bsz = bs;
+		buf = n;
+		/* read extra we do not know about */
+		ret = fread(buf, bs - bsz, 1, jd->in);
+		if (ret != 1)
+			goto error;
+	}
+	/*
+	 * keep dirname for generating files and mmap records
+	 */
+	strcpy(jd->dir, name);
+	dirname(jd->dir);
+
+	return 0;
+error:
+	funlockfile(jd->in);
+	fclose(jd->in);
+	return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+	struct jr_prefix *prefix;
+	union jr_entry *jr;
+	void *addr;
+	size_t bs, size;
+	int id, ret;
+
+	if (!(jd && jd->in))
+		return NULL;
+
+	if (jd->buf == NULL) {
+		size_t sz = getpagesize();
+		if (sz < sizeof(*prefix))
+			sz = sizeof(*prefix);
+
+		jd->buf = malloc(sz);
+		if (jd->buf == NULL)
+			return NULL;
+
+		jd->bufsize = sz;
+	}
+
+	prefix = jd->buf;
+
+	/*
+	 * file is still locked at this point
+	 */
+	ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+	if (ret  != 1)
+		return NULL;
+
+	if (jd->needs_bswap) {
+		prefix->id   	   = bswap_32(prefix->id);
+		prefix->total_size = bswap_32(prefix->total_size);
+		prefix->timestamp  = bswap_64(prefix->timestamp);
+	}
+	id   = prefix->id;
+	size = prefix->total_size;
+
+	bs = (size_t)size;
+	if (bs < sizeof(*prefix))
+		return NULL;
+
+	if (id >= JIT_CODE_MAX) {
+		pr_warning("next_entry: unknown prefix %d, skipping\n", id);
+		return NULL;
+	}
+	if (bs > jd->bufsize) {
+		void *n;
+		n = realloc(jd->buf, bs);
+		if (!n)
+			return NULL;
+		jd->buf = n;
+		jd->bufsize = bs;
+	}
+
+	addr = ((void *)jd->buf) + sizeof(*prefix);
+
+	ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+	if (ret != 1)
+		return NULL;
+
+	jr = (union jr_entry *)jd->buf;
+
+	switch(id) {
+	case JIT_CODE_DEBUG_INFO:
+		if (jd->needs_bswap) {
+			uint64_t n;
+			jr->info.code_addr = bswap_64(jr->info.code_addr);
+			jr->info.nr_entry  = bswap_64(jr->info.nr_entry);
+			for (n = 0 ; n < jr->info.nr_entry; n++) {
+				jr->info.entries[n].addr    = bswap_64(jr->info.entries[n].addr);
+				jr->info.entries[n].lineno  = bswap_32(jr->info.entries[n].lineno);
+				jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+			}
+		}
+		break;
+	case JIT_CODE_CLOSE:
+		break;
+	case JIT_CODE_LOAD:
+		if (jd->needs_bswap) {
+			jr->load.pid       = bswap_32(jr->load.pid);
+			jr->load.tid       = bswap_32(jr->load.tid);
+			jr->load.vma       = bswap_64(jr->load.vma);
+			jr->load.code_addr = bswap_64(jr->load.code_addr);
+			jr->load.code_size = bswap_64(jr->load.code_size);
+			jr->load.code_index= bswap_64(jr->load.code_index);
+		}
+		jd->code_load_count++;
+		break;
+	case JIT_CODE_MOVE:
+		if (jd->needs_bswap) {
+			jr->move.pid           = bswap_32(jr->move.pid);
+			jr->move.tid           = bswap_32(jr->move.tid);
+			jr->move.vma           = bswap_64(jr->move.vma);
+			jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+			jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+			jr->move.code_size     = bswap_64(jr->move.code_size);
+			jr->move.code_index    = bswap_64(jr->move.code_index);
+		}
+		break;
+	case JIT_CODE_MAX:
+	default:
+		return NULL;
+	}
+	return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+	ssize_t size;
+
+	size = perf_data_file__write(jd->output, event, event->header.size);
+	if (size < 0)
+		return -1;
+
+	jd->bytes_written += size;
+	return 0;
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	uint64_t code, addr;
+	uintptr_t uaddr;
+	char *filename;
+	struct stat st;
+	size_t size;
+	u16 idr_size;
+	const char *sym;
+	uint32_t count;
+	int ret, csize;
+	pid_t pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	pid   = jr->load.pid;
+	tid   = jr->load.tid;
+	csize = jr->load.code_size;
+	addr  = jr->load.code_addr;
+	sym   = (void *)((unsigned long)jr + sizeof(jr->load));
+	code  = (unsigned long)jr + jr->load.p.total_size - csize;
+	count = jr->load.code_index;
+	idr_size = jd->machine->id_hdr_size;
+
+	event = calloc(1, sizeof(*event) + idr_size);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+			jd->dir,
+			pid,
+			count);
+
+	size++; /* for \0 */
+
+	size = PERF_ALIGN(size, sizeof(u64));
+	uaddr = (uintptr_t)code;
+	ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries);
+
+	if (jd->debug_data && jd->nr_debug_entries) {
+		free(jd->debug_data);
+		jd->debug_data = NULL;
+		jd->nr_debug_entries = 0;
+	}
+
+	if (ret) {
+		free(event);
+		return -1;
+	}
+	if (stat(filename, &st))
+		memset(&st, 0, sizeof(stat));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = addr;
+	event->mmap2.len   = csize;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = jr->load.p.timestamp;
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	/*
+	 * mark dso as use to generate buildid in the header
+	 */
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	char *filename;
+	size_t size;
+	struct stat st;
+	u16 idr_size;
+	int ret;
+	pid_t pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	pid = jr->move.pid;
+	tid =  jr->move.tid;
+	idr_size = jd->machine->id_hdr_size;
+
+	/*
+	 * +16 to account for sample_id_all (hack)
+	 */
+	event = calloc(1, sizeof(*event) + 16);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+	         jd->dir,
+	         pid,
+		 jr->move.code_index);
+
+	size++; /* for \0 */
+
+	if (stat(filename, &st))
+		memset(&st, 0, sizeof(stat));
+
+	size = PERF_ALIGN(size, sizeof(u64));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = jr->move.new_code_addr;
+	event->mmap2.len   = jr->move.code_size;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = jr->load.p.timestamp;
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = jr->move.new_code_addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	void *data;
+	size_t sz;
+
+	if (!(jd && jr))
+		return -1;
+
+	sz  = jr->prefix.total_size - sizeof(jr->info);
+	data = malloc(sz);
+	if (!data)
+		return -1;
+
+	memcpy(data, &jr->info.entries, sz);
+
+	jd->debug_data       = data;
+
+	/*
+	 * we must use nr_entry instead of size here because
+	 * we cannot distinguish actual entry from padding otherwise
+	 */
+	jd->nr_debug_entries = jr->info.nr_entry;
+
+	return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+	union jr_entry *jr;
+	int ret;
+
+	while ((jr = jit_get_next_entry(jd))) {
+		switch(jr->prefix.id) {
+		case JIT_CODE_LOAD:
+			ret = jit_repipe_code_load(jd, jr);
+			break;
+		case JIT_CODE_MOVE:
+			ret = jit_repipe_code_move(jd, jr);
+			break;
+		case JIT_CODE_DEBUG_INFO:
+			ret = jit_repipe_debug_info(jd, jr);
+			break;
+		default:
+			ret = 0;
+			continue;
+		}
+	}
+	return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+	int ret;
+
+	if (verbose > 0)
+		fprintf(stderr, "injecting: %s\n", path);
+
+	ret = jit_open(jd, path);
+	if (ret)
+		return -1;
+
+	ret = jit_process_dump(jd);
+
+	jit_close(jd);
+
+	if (verbose > 0)
+		fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+	return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid)
+ {
+	char *p;
+	char *end = NULL;
+	pid_t pid2;
+
+	if (verbose > 2)
+		fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+	/*
+	 * get file name
+	 */
+	p = strrchr(mmap_name, '/');
+	if (!p)
+		return -1;
+
+	/*
+	 * match prefix
+	 */
+	if (strncmp(p, "/jit-", 5))
+		return -1;
+
+	/*
+	 * skip prefix
+	 */
+	p += 5;
+
+	/*
+	 * must be followed by a pid
+	 */
+	if (!isdigit(*p))
+		return -1;
+
+	pid2 = (int)strtol(p, &end, 10);
+	if (!end)
+		return -1;
+
+	/*
+	 * pid does not match mmap pid
+	 * pid==0 in system-wide mode (synthesized)
+	 */
+	if (pid && pid2 != pid)
+		return -1;
+	/*
+	 * validate suffix
+	 */
+	if (strcmp(end, ".dump"))
+		return -1;
+
+	if (verbose > 0)
+		fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+	return 0;
+}
+
+int
+jit_process(struct perf_session *session,
+	    struct perf_data_file *output,
+	    struct machine *machine,
+	    char *filename,
+	    pid_t pid,
+	    u64 *nbytes)
+{
+	struct perf_evsel *first;
+	struct jit_buf_desc jd;
+	int ret;
+
+	/*
+	 * first, detect marker mmap (i.e., the jitdump mmap)
+	 */
+	if (jit_detect(filename, pid))
+		return 0;
+
+	memset(&jd, 0, sizeof(jd));
+
+	jd.session = session;
+	jd.output  = output;
+	jd.machine = machine;
+
+	/*
+	 * track sample_type to compute id_all layout
+	 * perf sets the same sample type to all events as of now
+	 */
+	first = perf_evlist__first(session->evlist);
+	jd.sample_type = first->attr.sample_type;
+
+	*nbytes = 0;
+
+	ret = jit_inject(&jd, filename);
+	if (!ret) {
+		*nbytes = jd.bytes_written;
+		ret = 1;
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644
index 0000000..b66c1f5
--- /dev/null
+++ b/tools/perf/util/jitdump.h
@@ -0,0 +1,124 @@
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC		0x4A695444
+#define JITHEADER_MAGIC_SW	0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+	JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+				(~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+	uint32_t magic;		/* characters "jItD" */
+	uint32_t version;	/* header version */
+	uint32_t total_size;	/* total size of header */
+	uint32_t elf_mach;	/* elf mach target */
+	uint32_t pad1;		/* reserved */
+	uint32_t pid;		/* JIT process id */
+	uint64_t timestamp;	/* timestamp */
+	uint64_t flags;		/* flags */
+};
+
+enum jit_record_type {
+	JIT_CODE_LOAD		= 0,
+        JIT_CODE_MOVE           = 1,
+	JIT_CODE_DEBUG_INFO	= 2,
+	JIT_CODE_CLOSE		= 3,
+
+	JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+	uint32_t id;
+	uint32_t total_size;
+	uint64_t timestamp;
+};
+
+struct jr_code_load {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct jr_code_close {
+	struct jr_prefix p;
+};
+
+struct jr_code_move {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t old_code_addr;
+	uint64_t new_code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct debug_entry {
+	uint64_t addr;
+	int lineno;	    /* source line number starting at 1 */
+	int discrim;	    /* column discriminator, 0 is default */
+	const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+	struct jr_prefix p;
+
+	uint64_t code_addr;
+	uint64_t nr_entry;
+	struct debug_entry entries[0];
+};
+
+union jr_entry {
+        struct jr_code_debug_info info;
+        struct jr_code_close close;
+        struct jr_code_load load;
+        struct jr_code_move move;
+        struct jr_prefix prefix;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	size_t l = strlen(ent->name) + 1;
+	return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	return a;
+}
+
+#endif /* !JITDUMP_H */
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index ae825d4..d01e735 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -122,6 +122,7 @@
 
 bool kvm_exit_event(struct perf_evsel *evsel);
 bool kvm_entry_event(struct perf_evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
 
 #define define_exit_reasons_table(name, symbols)	\
 	static struct exit_reasons_table name[] = {	\
@@ -133,8 +134,13 @@
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
 
 #endif /* __PERF_KVM_STAT_H */
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 2c2b443..1a3e45b 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -180,6 +180,16 @@
 }
 
 static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+						   enum map_type type, const char *name,
+						   struct map **mapp,
+						   symbol_filter_t filter)
+{
+	return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
+					       mapp, filter);
+}
+
+static inline
 struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
 					     struct map **mapp,
 					     symbol_filter_t filter)
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644
index 0000000..75465f8
--- /dev/null
+++ b/tools/perf/util/mem-events.c
@@ -0,0 +1,255 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=30/P",	"mem-loads"),
+	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
+};
+#undef E
+
+#undef E
+
+char *perf_mem_events__name(int i)
+{
+	return (char *)perf_mem_events[i].name;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+	char *tok, *saveptr = NULL;
+	bool found = false;
+	char *buf;
+	int j;
+
+	/* We need buffer that we know we can write to. */
+	buf = malloc(strlen(str) + 1);
+	if (!buf)
+		return -ENOMEM;
+
+	strcpy(buf, str);
+
+	tok = strtok_r((char *)buf, ",", &saveptr);
+
+	while (tok) {
+		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+			struct perf_mem_event *e = &perf_mem_events[j];
+
+			if (strstr(e->tag, tok))
+				e->record = found = true;
+		}
+
+		tok = strtok_r(NULL, ",", &saveptr);
+	}
+
+	free(buf);
+
+	if (found)
+		return 0;
+
+	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+	return -1;
+}
+
+int perf_mem_events__init(void)
+{
+	const char *mnt = sysfs__mount();
+	bool found = false;
+	int j;
+
+	if (!mnt)
+		return -ENOENT;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		char path[PATH_MAX];
+		struct perf_mem_event *e = &perf_mem_events[j];
+		struct stat st;
+
+		scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+			  mnt, e->sysfs_name);
+
+		if (!stat(path, &st))
+			e->supported = found = true;
+	}
+
+	return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"L2",
+	"Walker",
+	"Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t l = 0, i;
+	u64 m = PERF_MEM_TLB_NA;
+	u64 hit, miss;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_dtlb;
+
+	hit = m & PERF_MEM_TLB_HIT;
+	miss = m & PERF_MEM_TLB_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, tlb_access[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const mem_lvl[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"LFB",
+	"L2",
+	"L3",
+	"Local RAM",
+	"Remote RAM (1 hop)",
+	"Remote RAM (2 hops)",
+	"Remote Cache (1 hop)",
+	"Remote Cache (2 hops)",
+	"I/O",
+	"Uncached",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m =  PERF_MEM_LVL_NA;
+	u64 hit, miss;
+
+	if (mem_info)
+		m  = mem_info->data_src.mem_lvl;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	hit = m & PERF_MEM_LVL_HIT;
+	miss = m & PERF_MEM_LVL_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, mem_lvl[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const snoop_access[] = {
+	"N/A",
+	"None",
+	"Miss",
+	"Hit",
+	"HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m = PERF_MEM_SNOOP_NA;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_snoop;
+
+	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, snoop_access[i]);
+	}
+
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+
+	return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	u64 mask = PERF_MEM_LOCK_NA;
+	int l;
+
+	if (mem_info)
+		mask = mem_info->data_src.mem_lock;
+
+	if (mask & PERF_MEM_LOCK_NA)
+		l = scnprintf(out, sz, "N/A");
+	else if (mask & PERF_MEM_LOCK_LOCKED)
+		l = scnprintf(out, sz, "Yes");
+	else
+		l = scnprintf(out, sz, "No");
+
+	return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	int i = 0;
+
+	i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+	i += scnprintf(out + i, sz - i, "|SNP ");
+	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|TLB ");
+	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|LCK ");
+	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+	return i;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644
index 0000000..5d6d930
--- /dev/null
+++ b/tools/perf/util/mem-events.h
@@ -0,0 +1,35 @@
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+
+struct perf_mem_event {
+	bool		record;
+	bool		supported;
+	const char	*tag;
+	const char	*name;
+	const char	*sysfs_name;
+};
+
+enum {
+	PERF_MEM_EVENTS__LOAD,
+	PERF_MEM_EVENTS__STORE,
+	PERF_MEM_EVENTS__MAX,
+};
+
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4f7b0ef..4c19d5e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -279,7 +279,24 @@
 	return "unknown";
 }
 
+static int parse_events__is_name_term(struct parse_events_term *term)
+{
+	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
+}
 
+static char *get_config_name(struct list_head *head_terms)
+{
+	struct parse_events_term *term;
+
+	if (!head_terms)
+		return NULL;
+
+	list_for_each_entry(term, head_terms, list)
+		if (parse_events__is_name_term(term))
+			return term->val.str;
+
+	return NULL;
+}
 
 static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
@@ -333,11 +350,25 @@
 	return -1;
 }
 
+typedef int config_term_func_t(struct perf_event_attr *attr,
+			       struct parse_events_term *term,
+			       struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+		       struct list_head *head,
+		       struct parse_events_error *err,
+		       config_term_func_t config_term);
+
 int parse_events_add_cache(struct list_head *list, int *idx,
-			   char *type, char *op_result1, char *op_result2)
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *err,
+			   struct list_head *head_config)
 {
 	struct perf_event_attr attr;
-	char name[MAX_NAME_LEN];
+	LIST_HEAD(config_terms);
+	char name[MAX_NAME_LEN], *config_name;
 	int cache_type = -1, cache_op = -1, cache_result = -1;
 	char *op_result[2] = { op_result1, op_result2 };
 	int i, n;
@@ -351,6 +382,7 @@
 	if (cache_type == -1)
 		return -EINVAL;
 
+	config_name = get_config_name(head_config);
 	n = snprintf(name, MAX_NAME_LEN, "%s", type);
 
 	for (i = 0; (i < 2) && (op_result[i]); i++) {
@@ -391,7 +423,16 @@
 	memset(&attr, 0, sizeof(attr));
 	attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
 	attr.type = PERF_TYPE_HW_CACHE;
-	return add_event(list, idx, &attr, name, NULL);
+
+	if (head_config) {
+		if (config_attr(&attr, head_config, err,
+				config_term_common))
+			return -EINVAL;
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+	}
+	return add_event(list, idx, &attr, config_name ? : name, &config_terms);
 }
 
 static void tracepoint_error(struct parse_events_error *e, int err,
@@ -399,6 +440,9 @@
 {
 	char help[BUFSIZ];
 
+	if (!e)
+		return;
+
 	/*
 	 * We get error directly from syscall errno ( > 0),
 	 * or from encoded pointer's error ( < 0).
@@ -537,6 +581,7 @@
 struct __add_bpf_event_param {
 	struct parse_events_evlist *data;
 	struct list_head *list;
+	struct list_head *head_config;
 };
 
 static int add_bpf_event(struct probe_trace_event *tev, int fd,
@@ -553,7 +598,8 @@
 		 tev->group, tev->event, fd);
 
 	err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group,
-					  tev->event, evlist->error, NULL);
+					  tev->event, evlist->error,
+					  param->head_config);
 	if (err) {
 		struct perf_evsel *evsel, *tmp;
 
@@ -578,11 +624,12 @@
 
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 			      struct list_head *list,
-			      struct bpf_object *obj)
+			      struct bpf_object *obj,
+			      struct list_head *head_config)
 {
 	int err;
 	char errbuf[BUFSIZ];
-	struct __add_bpf_event_param param = {data, list};
+	struct __add_bpf_event_param param = {data, list, head_config};
 	static bool registered_unprobe_atexit = false;
 
 	if (IS_ERR(obj) || !obj) {
@@ -628,17 +675,99 @@
 	return err;
 }
 
+static int
+parse_events_config_bpf(struct parse_events_evlist *data,
+			struct bpf_object *obj,
+			struct list_head *head_config)
+{
+	struct parse_events_term *term;
+	int error_pos;
+
+	if (!head_config || list_empty(head_config))
+		return 0;
+
+	list_for_each_entry(term, head_config, list) {
+		char errbuf[BUFSIZ];
+		int err;
+
+		if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+			snprintf(errbuf, sizeof(errbuf),
+				 "Invalid config term for BPF object");
+			errbuf[BUFSIZ - 1] = '\0';
+
+			data->error->idx = term->err_term;
+			data->error->str = strdup(errbuf);
+			return -EINVAL;
+		}
+
+		err = bpf__config_obj(obj, term, data->evlist, &error_pos);
+		if (err) {
+			bpf__strerror_config_obj(obj, term, data->evlist,
+						 &error_pos, err, errbuf,
+						 sizeof(errbuf));
+			data->error->help = strdup(
+"Hint:\tValid config terms:\n"
+"     \tmap:[<arraymap>].value<indices>=[value]\n"
+"     \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+"     \twhere <indices> is something like [0,3...5] or [all]\n"
+"     \t(add -v to see detail)");
+			data->error->str = strdup(errbuf);
+			if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+				data->error->idx = term->err_val;
+			else
+				data->error->idx = term->err_term + error_pos;
+			return err;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ *  'call-graph=fp' is 'evt config', should be applied to each
+ *  events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+		       struct list_head *obj_head_config)
+{
+	struct parse_events_term *term, *temp;
+
+	/*
+	 * Currectly, all possible user config term
+	 * belong to bpf object. parse_events__is_hardcoded_term()
+	 * happends to be a good flag.
+	 *
+	 * See parse_events_config_bpf() and
+	 * config_term_tracepoint().
+	 */
+	list_for_each_entry_safe(term, temp, evt_head_config, list)
+		if (!parse_events__is_hardcoded_term(term))
+			list_move_tail(&term->list, obj_head_config);
+}
+
 int parse_events_load_bpf(struct parse_events_evlist *data,
 			  struct list_head *list,
 			  char *bpf_file_name,
-			  bool source)
+			  bool source,
+			  struct list_head *head_config)
 {
+	int err;
 	struct bpf_object *obj;
+	LIST_HEAD(obj_head_config);
+
+	if (head_config)
+		split_bpf_config_terms(head_config, &obj_head_config);
 
 	obj = bpf__prepare_load(bpf_file_name, source);
 	if (IS_ERR(obj)) {
 		char errbuf[BUFSIZ];
-		int err;
 
 		err = PTR_ERR(obj);
 
@@ -656,7 +785,18 @@
 		return err;
 	}
 
-	return parse_events_load_bpf_obj(data, list, obj);
+	err = parse_events_load_bpf_obj(data, list, obj, head_config);
+	if (err)
+		return err;
+	err = parse_events_config_bpf(data, obj, &obj_head_config);
+
+	/*
+	 * Caller doesn't know anything about obj_head_config,
+	 * so combine them together again before returnning.
+	 */
+	if (head_config)
+		list_splice_tail(&obj_head_config, head_config);
+	return err;
 }
 
 static int
@@ -743,9 +883,59 @@
 	return -EINVAL;
 }
 
-typedef int config_term_func_t(struct perf_event_attr *attr,
-			       struct parse_events_term *term,
-			       struct parse_events_error *err);
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+	[PARSE_EVENTS__TERM_TYPE_USER]			= "<sysfs term>",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG]		= "config",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG1]		= "config1",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG2]		= "config2",
+	[PARSE_EVENTS__TERM_TYPE_NAME]			= "name",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]		= "period",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]		= "freq",
+	[PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]	= "branch_type",
+	[PARSE_EVENTS__TERM_TYPE_TIME]			= "time",
+	[PARSE_EVENTS__TERM_TYPE_CALLGRAPH]		= "call-graph",
+	[PARSE_EVENTS__TERM_TYPE_STACKSIZE]		= "stack-size",
+	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
+	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+	if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+		err->str = strdup("Invalid term_type");
+		return false;
+	}
+	if (!config_term_shrinked)
+		return true;
+
+	switch (term_type) {
+	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+	case PARSE_EVENTS__TERM_TYPE_NAME:
+		return true;
+	default:
+		if (!err)
+			return false;
+
+		/* term_type is validated so indexing is safe */
+		if (asprintf(&err->str, "'%s' is not usable in 'perf stat'",
+			     config_term_names[term_type]) < 0)
+			err->str = NULL;
+		return false;
+	}
+}
+
+void parse_events__shrink_config_terms(void)
+{
+	config_term_shrinked = true;
+}
 
 static int config_term_common(struct perf_event_attr *attr,
 			      struct parse_events_term *term,
@@ -812,6 +1002,17 @@
 		return -EINVAL;
 	}
 
+	/*
+	 * Check term availbility after basic checking so
+	 * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+	 *
+	 * If check availbility at the entry of this function,
+	 * user will see "'<sysfs term>' is not usable in 'perf stat'"
+	 * if an invalid config term is provided for legacy events
+	 * (for example, instructions/badterm/...), which is confusing.
+	 */
+	if (!config_term_avail(term->type_term, err))
+		return -EINVAL;
 	return 0;
 #undef CHECK_TYPE_VAL
 }
@@ -958,23 +1159,8 @@
 			return -ENOMEM;
 	}
 
-	return add_event(list, &data->idx, &attr, NULL, &config_terms);
-}
-
-static int parse_events__is_name_term(struct parse_events_term *term)
-{
-	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
-}
-
-static char *pmu_event_name(struct list_head *head_terms)
-{
-	struct parse_events_term *term;
-
-	list_for_each_entry(term, head_terms, list)
-		if (parse_events__is_name_term(term))
-			return term->val.str;
-
-	return NULL;
+	return add_event(list, &data->idx, &attr,
+			 get_config_name(head_config), &config_terms);
 }
 
 int parse_events_add_pmu(struct parse_events_evlist *data,
@@ -1021,7 +1207,7 @@
 		return -EINVAL;
 
 	evsel = __add_event(list, &data->idx, &attr,
-			    pmu_event_name(head_config), pmu->cpus,
+			    get_config_name(head_config), pmu->cpus,
 			    &config_terms);
 	if (evsel) {
 		evsel->unit = info.unit;
@@ -1383,8 +1569,7 @@
 		return 0;
 	}
 
-	if (data.terms)
-		parse_events__free_terms(data.terms);
+	parse_events_terms__delete(data.terms);
 	return ret;
 }
 
@@ -1392,9 +1577,10 @@
 		 struct parse_events_error *err)
 {
 	struct parse_events_evlist data = {
-		.list  = LIST_HEAD_INIT(data.list),
-		.idx   = evlist->nr_entries,
-		.error = err,
+		.list   = LIST_HEAD_INIT(data.list),
+		.idx    = evlist->nr_entries,
+		.error  = err,
+		.evlist = evlist,
 	};
 	int ret;
 
@@ -2065,12 +2251,29 @@
 			term->err_term, term->err_val);
 }
 
-void parse_events__free_terms(struct list_head *terms)
+void parse_events_terms__purge(struct list_head *terms)
 {
 	struct parse_events_term *term, *h;
 
-	list_for_each_entry_safe(term, h, terms, list)
+	list_for_each_entry_safe(term, h, terms, list) {
+		if (term->array.nr_ranges)
+			free(term->array.ranges);
+		list_del_init(&term->list);
 		free(term);
+	}
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+	if (!terms)
+		return;
+	parse_events_terms__purge(terms);
+	free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+	free(a->ranges);
 }
 
 void parse_events_evlist_error(struct parse_events_evlist *data,
@@ -2085,6 +2288,33 @@
 	WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
 }
 
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+	int i;
+	bool first = true;
+
+	buf[0] = '\0';
+	for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+		const char *name = config_term_names[i];
+
+		if (!config_term_avail(i, NULL))
+			continue;
+		if (!name)
+			continue;
+		if (name[0] == '<')
+			continue;
+
+		if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+			return;
+
+		if (!first)
+			strcat(buf, ",");
+		else
+			first = false;
+		strcat(buf, name);
+	}
+}
+
 /*
  * Return string contains valid config terms of an event.
  * @additional_terms: For terms such as PMU sysfs terms.
@@ -2092,17 +2322,18 @@
 char *parse_events_formats_error_string(char *additional_terms)
 {
 	char *str;
-	static const char *static_terms = "config,config1,config2,name,"
-					  "period,freq,branch_type,time,"
-					  "call-graph,stack-size\n";
+	/* "branch_type" is the longest name */
+	char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+			  (sizeof("branch_type") - 1)];
 
+	config_terms_list(static_terms, sizeof(static_terms));
 	/* valid terms */
 	if (additional_terms) {
-		if (!asprintf(&str, "valid terms: %s,%s",
-			      additional_terms, static_terms))
+		if (asprintf(&str, "valid terms: %s,%s",
+			     additional_terms, static_terms) < 0)
 			goto fail;
 	} else {
-		if (!asprintf(&str, "valid terms: %s", static_terms))
+		if (asprintf(&str, "valid terms: %s", static_terms) < 0)
 			goto fail;
 	}
 	return str;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index f1a6db1..67e4930 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -68,11 +68,21 @@
 	PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
 	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
 	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
-	PARSE_EVENTS__TERM_TYPE_INHERIT
+	PARSE_EVENTS__TERM_TYPE_INHERIT,
+	__PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+	size_t nr_ranges;
+	struct {
+		unsigned int start;
+		size_t length;
+	} *ranges;
 };
 
 struct parse_events_term {
 	char *config;
+	struct parse_events_array array;
 	union {
 		char *str;
 		u64  num;
@@ -98,12 +108,14 @@
 	int			   idx;
 	int			   nr_groups;
 	struct parse_events_error *error;
+	struct perf_evlist	  *evlist;
 };
 
 struct parse_events_terms {
 	struct list_head *terms;
 };
 
+void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
@@ -115,7 +127,9 @@
 			      char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,
 			     struct parse_events_term *term);
-void parse_events__free_terms(struct list_head *terms);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
@@ -126,18 +140,22 @@
 int parse_events_load_bpf(struct parse_events_evlist *data,
 			  struct list_head *list,
 			  char *bpf_file_name,
-			  bool source);
+			  bool source,
+			  struct list_head *head_config);
 /* Provide this function for perf test */
 struct bpf_object;
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 			      struct list_head *list,
-			      struct bpf_object *obj);
+			      struct bpf_object *obj,
+			      struct list_head *head_config);
 int parse_events_add_numeric(struct parse_events_evlist *data,
 			     struct list_head *list,
 			     u32 type, u64 config,
 			     struct list_head *head_config);
 int parse_events_add_cache(struct list_head *list, int *idx,
-			   char *type, char *op_result1, char *op_result2);
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *error,
+			   struct list_head *head_config);
 int parse_events_add_breakpoint(struct list_head *list, int *idx,
 				void *ptr, char *type, u64 len);
 int parse_events_add_pmu(struct parse_events_evlist *data,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 58c5831..1477fbc 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -9,8 +9,8 @@
 %{
 #include <errno.h>
 #include "../perf.h"
-#include "parse-events-bison.h"
 #include "parse-events.h"
+#include "parse-events-bison.h"
 
 char *parse_events_get_text(yyscan_t yyscanner);
 YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -111,6 +111,7 @@
 %x mem
 %s config
 %x event
+%x array
 
 group		[^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu	[^,{}/]+[/][^/]*[/][^,{}/]*
@@ -122,7 +123,7 @@
 num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?.]*
-name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.]*
+name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
 /* If you add a modifier you need to update check_modifier() */
 modifier_event	[ukhpPGHSDI]+
 modifier_bp	[rwx]{1,3}
@@ -176,10 +177,17 @@
 
 }
 
+<array>{
+"]"			{ BEGIN(config); return ']'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+,			{ return ','; }
+"\.\.\."		{ return PE_ARRAY_RANGE; }
+}
+
 <config>{
 	/*
-	 * Please update parse_events_formats_error_string any time
-	 * new static term is added.
+	 * Please update config_term_names when new static term is added.
 	 */
 config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
 config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
@@ -196,6 +204,8 @@
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
+\[all\]			{ return PE_ARRAY_ALL; }
+"["			{ BEGIN(array); return '['; }
 }
 
 <mem>{
@@ -238,6 +248,7 @@
 alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
 emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
 
 	/*
 	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index ad37996..5be4a5f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -28,7 +28,7 @@
 	INIT_LIST_HEAD(list);         \
 } while (0)
 
-static inc_group_count(struct list_head *list,
+static void inc_group_count(struct list_head *list,
 		       struct parse_events_evlist *data)
 {
 	/* Count groups only have more than 1 members */
@@ -48,6 +48,7 @@
 %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
 %token PE_ERROR
 %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
@@ -64,6 +65,7 @@
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
 %type <num> value_sym
 %type <head> event_config
+%type <head> opt_event_config
 %type <term> event_term
 %type <head> event_pmu
 %type <head> event_legacy_symbol
@@ -82,6 +84,9 @@
 %type <head> group_def
 %type <head> group
 %type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
 
 %union
 {
@@ -93,6 +98,7 @@
 		char *sys;
 		char *event;
 	} tracepoint_name;
+	struct parse_events_array array;
 }
 %%
 
@@ -211,24 +217,14 @@
 	   event_bpf_file
 
 event_pmu:
-PE_NAME '/' event_config '/'
+PE_NAME opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
-	parse_events__free_terms($3);
-	$$ = list;
-}
-|
-PE_NAME '/' '/'
-{
-	struct parse_events_evlist *data = _data;
-	struct list_head *list;
-
-	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
+	ABORT_ON(parse_events_add_pmu(data, list, $1, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 |
@@ -246,7 +242,7 @@
 
 	ALLOC_LIST(list);
 	ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-	parse_events__free_terms(head);
+	parse_events_terms__delete(head);
 	$$ = list;
 }
 |
@@ -266,7 +262,7 @@
 
 	ALLOC_LIST(list);
 	ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-	parse_events__free_terms(head);
+	parse_events_terms__delete(head);
 	$$ = list;
 }
 
@@ -285,7 +281,7 @@
 
 	ALLOC_LIST(list);
 	ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
-	parse_events__free_terms($3);
+	parse_events_terms__delete($3);
 	$$ = list;
 }
 |
@@ -302,33 +298,39 @@
 }
 
 event_legacy_cache:
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
 	struct parse_events_evlist *data = _data;
+	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5));
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6));
+	parse_events_terms__delete($6);
 	$$ = list;
 }
 |
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
 	struct parse_events_evlist *data = _data;
+	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL));
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4));
+	parse_events_terms__delete($4);
 	$$ = list;
 }
 |
-PE_NAME_CACHE_TYPE
+PE_NAME_CACHE_TYPE opt_event_config
 {
 	struct parse_events_evlist *data = _data;
+	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL));
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 
@@ -378,7 +380,7 @@
 }
 
 event_legacy_tracepoint:
-tracepoint_name
+tracepoint_name opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct parse_events_error *error = data->error;
@@ -389,24 +391,7 @@
 		error->idx = @1.first_column;
 
 	if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-					error, NULL))
-		return -1;
-
-	$$ = list;
-}
-|
-tracepoint_name '/' event_config '/'
-{
-	struct parse_events_evlist *data = _data;
-	struct parse_events_error *error = data->error;
-	struct list_head *list;
-
-	ALLOC_LIST(list);
-	if (error)
-		error->idx = @1.first_column;
-
-	if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-					error, $3))
+					error, $2))
 		return -1;
 
 	$$ = list;
@@ -433,49 +418,68 @@
 }
 
 event_legacy_numeric:
-PE_VALUE ':' PE_VALUE
+PE_VALUE ':' PE_VALUE opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4));
+	parse_events_terms__delete($4);
 	$$ = list;
 }
 
 event_legacy_raw:
-PE_RAW
+PE_RAW opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 
 event_bpf_file:
-PE_BPF_OBJECT
+PE_BPF_OBJECT opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_load_bpf(data, list, $1, false));
+	ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 |
-PE_BPF_SOURCE
+PE_BPF_SOURCE opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_load_bpf(data, list, $1, true));
+	ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 
+opt_event_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+|
+{
+	$$ = NULL;
+}
+
 start_terms: event_config
 {
 	struct parse_events_terms *data = _data;
@@ -573,6 +577,86 @@
 	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
 	$$ = term;
 }
+|
+PE_NAME array '=' PE_NAME
+{
+	struct parse_events_term *term;
+	int i;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, &@1, &@4));
+
+	term->array = $2;
+	$$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, &@1, &@4));
+	term->array = $2;
+	$$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+	$$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+	$$.nr_ranges = 0;
+	$$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+	struct parse_events_array new_array;
+
+	new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+	new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+				  new_array.nr_ranges);
+	ABORT_ON(!new_array.ranges);
+	memcpy(&new_array.ranges[0], $1.ranges,
+	       $1.nr_ranges * sizeof(new_array.ranges[0]));
+	memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+	       $3.nr_ranges * sizeof(new_array.ranges[0]));
+	free($1.ranges);
+	free($3.ranges);
+	$$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+	struct parse_events_array array;
+
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = 1;
+	$$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+	struct parse_events_array array;
+
+	ABORT_ON($3 < $1);
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = $3 - $1 + 1;
+	$$ = array;
+}
 
 sep_dc: ':' |
 
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b597bcc..adef23b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -98,7 +98,7 @@
 	char scale[128];
 	int fd, ret = -1;
 	char path[PATH_MAX];
-	const char *lc;
+	char *lc;
 
 	snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
 
@@ -124,6 +124,17 @@
 	lc = setlocale(LC_NUMERIC, NULL);
 
 	/*
+	 * The lc string may be allocated in static storage,
+	 * so get a dynamic copy to make it survive setlocale
+	 * call below.
+	 */
+	lc = strdup(lc);
+	if (!lc) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/*
 	 * force to C locale to ensure kernel
 	 * scale string is converted correctly.
 	 * kernel uses default C locale.
@@ -135,6 +146,8 @@
 	/* restore locale */
 	setlocale(LC_NUMERIC, lc);
 
+	free(lc);
+
 	ret = 0;
 error:
 	close(fd);
@@ -153,7 +166,7 @@
 	if (fd == -1)
 		return -1;
 
-		sret = read(fd, alias->unit, UNIT_MAX_LEN);
+	sret = read(fd, alias->unit, UNIT_MAX_LEN);
 	if (sret < 0)
 		goto error;
 
@@ -284,13 +297,12 @@
 {
 	struct dirent *evt_ent;
 	DIR *event_dir;
-	int ret = 0;
 
 	event_dir = opendir(dir);
 	if (!event_dir)
 		return -EINVAL;
 
-	while (!ret && (evt_ent = readdir(event_dir))) {
+	while ((evt_ent = readdir(event_dir))) {
 		char path[PATH_MAX];
 		char *name = evt_ent->d_name;
 		FILE *file;
@@ -306,17 +318,19 @@
 
 		snprintf(path, PATH_MAX, "%s/%s", dir, name);
 
-		ret = -EINVAL;
 		file = fopen(path, "r");
-		if (!file)
-			break;
+		if (!file) {
+			pr_debug("Cannot open %s\n", path);
+			continue;
+		}
 
-		ret = perf_pmu__new_alias(head, dir, name, file);
+		if (perf_pmu__new_alias(head, dir, name, file) < 0)
+			pr_debug("Cannot set up %s\n", name);
 		fclose(file);
 	}
 
 	closedir(event_dir);
-	return ret;
+	return 0;
 }
 
 /*
@@ -354,7 +368,7 @@
 	list_for_each_entry(term, &alias->terms, list) {
 		ret = parse_events_term__clone(&cloned, term);
 		if (ret) {
-			parse_events__free_terms(&list);
+			parse_events_terms__purge(&list);
 			return ret;
 		}
 		list_add_tail(&cloned->list, &list);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 2be10fb..4ce5c5e 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -686,8 +686,9 @@
 		pf->fb_ops = NULL;
 #if _ELFUTILS_PREREQ(0, 142)
 	} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
-		   pf->cfi != NULL) {
-		if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
+		   (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+		if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+		     (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
 		    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
 			pr_warning("Failed to get call frame on 0x%jx\n",
 				   (uintmax_t)pf->addr);
@@ -1015,8 +1016,7 @@
 	return DWARF_CB_OK;
 }
 
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
 				  struct probe_finder *pf)
 {
 	struct perf_probe_point *pp = &pf->pev->point;
@@ -1025,27 +1025,6 @@
 	Dwarf_Die *diep;
 	int ret = 0;
 
-#if _ELFUTILS_PREREQ(0, 142)
-	Elf *elf;
-	GElf_Ehdr ehdr;
-	GElf_Shdr shdr;
-
-	/* Get the call frame information from this dwarf */
-	elf = dwarf_getelf(dbg->dbg);
-	if (elf == NULL)
-		return -EINVAL;
-
-	if (gelf_getehdr(elf, &ehdr) == NULL)
-		return -EINVAL;
-
-	if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
-	    shdr.sh_type == SHT_PROGBITS) {
-		pf->cfi = dwarf_getcfi_elf(elf);
-	} else {
-		pf->cfi = dwarf_getcfi(dbg->dbg);
-	}
-#endif
-
 	off = 0;
 	pf->lcache = intlist__new(NULL);
 	if (!pf->lcache)
@@ -1108,6 +1087,39 @@
 	return ret;
 }
 
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+				  struct probe_finder *pf)
+{
+	int ret = 0;
+
+#if _ELFUTILS_PREREQ(0, 142)
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+
+	if (pf->cfi_eh || pf->cfi_dbg)
+		return debuginfo__find_probe_location(dbg, pf);
+
+	/* Get the call frame information from this dwarf */
+	elf = dwarf_getelf(dbg->dbg);
+	if (elf == NULL)
+		return -EINVAL;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		return -EINVAL;
+
+	if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+	    shdr.sh_type == SHT_PROGBITS)
+		pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+	pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+#endif
+
+	ret = debuginfo__find_probe_location(dbg, pf);
+	return ret;
+}
+
 struct local_vars_finder {
 	struct probe_finder *pf;
 	struct perf_probe_arg *args;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index bed8271..0aec770 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -76,7 +76,10 @@
 
 	/* For variable searching */
 #if _ELFUTILS_PREREQ(0, 142)
-	Dwarf_CFI		*cfi;		/* Call Frame Information */
+	/* Call Frame Information from .eh_frame */
+	Dwarf_CFI		*cfi_eh;
+	/* Call Frame Information from .debug_frame */
+	Dwarf_CFI		*cfi_dbg;
 #endif
 	Dwarf_Op		*fb_ops;	/* Frame base attribute */
 	struct perf_probe_arg	*pvar;		/* Current target variable */
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 544509c..b3aabc0 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -187,6 +187,9 @@
 				 const char *ev_name,
 				 struct print_arg *args)
 {
+	if (args == NULL)
+		return;
+
 	switch (args->type) {
 	case PRINT_NULL:
 		break;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index d72fafc..fbd0524 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -205,6 +205,9 @@
 				 const char *ev_name,
 				 struct print_arg *args)
 {
+	if (args == NULL)
+		return;
+
 	switch (args->type) {
 	case PRINT_NULL:
 		break;
@@ -1091,8 +1094,6 @@
 		goto error;
 	}
 
-	free(command_line);
-
 	set_table_handlers(tables);
 
 	if (tables->db_export_mode) {
@@ -1101,6 +1102,8 @@
 			goto error;
 	}
 
+	free(command_line);
+
 	return err;
 error:
 	Py_Finalize();
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d5636ba..60b3593 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -240,14 +240,6 @@
 	return 0;
 }
 
-static int process_build_id_stub(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event __maybe_unused,
-				 struct perf_session *session __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
 static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
 				       union perf_event *event __maybe_unused,
 				       struct ordered_events *oe __maybe_unused)
@@ -260,23 +252,6 @@
 				  union perf_event *event,
 				  struct ordered_events *oe);
 
-static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event __maybe_unused,
-				 struct perf_session *perf_session
-				 __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
-static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
-				union perf_event *event __maybe_unused,
-				struct perf_session *session __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
 static int skipn(int fd, off_t n)
 {
 	char buf[4096];
@@ -303,10 +278,9 @@
 	return event->auxtrace.size;
 }
 
-static
-int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
-				      union perf_event *event __maybe_unused,
-				      struct perf_session *session __maybe_unused)
+static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct perf_session *session __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -410,7 +384,7 @@
 	if (tool->tracing_data == NULL)
 		tool->tracing_data = process_event_synth_tracing_data_stub;
 	if (tool->build_id == NULL)
-		tool->build_id = process_build_id_stub;
+		tool->build_id = process_event_op2_stub;
 	if (tool->finished_round == NULL) {
 		if (tool->ordered_events)
 			tool->finished_round = process_finished_round;
@@ -418,13 +392,13 @@
 			tool->finished_round = process_finished_round_stub;
 	}
 	if (tool->id_index == NULL)
-		tool->id_index = process_id_index_stub;
+		tool->id_index = process_event_op2_stub;
 	if (tool->auxtrace_info == NULL)
-		tool->auxtrace_info = process_event_auxtrace_info_stub;
+		tool->auxtrace_info = process_event_op2_stub;
 	if (tool->auxtrace == NULL)
 		tool->auxtrace = process_event_auxtrace_stub;
 	if (tool->auxtrace_error == NULL)
-		tool->auxtrace_error = process_event_auxtrace_error_stub;
+		tool->auxtrace_error = process_event_op2_stub;
 	if (tool->thread_map == NULL)
 		tool->thread_map = process_event_thread_map_stub;
 	if (tool->cpu_map == NULL)
@@ -1149,7 +1123,7 @@
 
 		machine = machines__find(machines, pid);
 		if (!machine)
-			machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
+			machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
 		return machine;
 	}
 
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 1833103..c8680984 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -22,6 +22,7 @@
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
 		  sources = ext_sources,
 		  include_dirs = ['util/include'],
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ec72234..93fa136 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -6,6 +6,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include <traceevent/event-parse.h>
+#include "mem-events.h"
 
 regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -25,9 +26,19 @@
 int		sort__has_sym = 0;
 int		sort__has_dso = 0;
 int		sort__has_socket = 0;
+int		sort__has_thread = 0;
+int		sort__has_comm = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
-
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurances of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
 	int n;
@@ -80,10 +91,21 @@
 			       width, width, comm ?: "");
 }
 
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct thread *th = arg;
+
+	if (type != HIST_FILTER__THREAD)
+		return -1;
+
+	return th && he->thread != th;
+}
+
 struct sort_entry sort_thread = {
 	.se_header	= "  Pid:Command",
 	.se_cmp		= sort__thread_cmp,
 	.se_snprintf	= hist_entry__thread_snprintf,
+	.se_filter	= hist_entry__thread_filter,
 	.se_width_idx	= HISTC_THREAD,
 };
 
@@ -121,6 +143,7 @@
 	.se_collapse	= sort__comm_collapse,
 	.se_sort	= sort__comm_sort,
 	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_filter	= hist_entry__thread_filter,
 	.se_width_idx	= HISTC_COMM,
 };
 
@@ -170,10 +193,21 @@
 	return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
 }
 
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
 struct sort_entry sort_dso = {
 	.se_header	= "Shared Object",
 	.se_cmp		= sort__dso_cmp,
 	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_filter	= hist_entry__dso_filter,
 	.se_width_idx	= HISTC_DSO,
 };
 
@@ -246,10 +280,8 @@
 			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
 			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
 					ip - map->unmap_ip(map, sym->start));
-			ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-				       width - ret, "");
 		} else {
-			ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+			ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
 					       width - ret,
 					       sym->name);
 		}
@@ -257,14 +289,9 @@
 		size_t len = BITS_PER_LONG / 4;
 		ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
 				       len, ip);
-		ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-				       width - ret, "");
 	}
 
-	if (ret > width)
-		bf[width] = '\0';
-
-	return width;
+	return ret;
 }
 
 static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
@@ -274,46 +301,56 @@
 					 he->level, bf, size, width);
 }
 
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
 struct sort_entry sort_sym = {
 	.se_header	= "Symbol",
 	.se_cmp		= sort__sym_cmp,
 	.se_sort	= sort__sym_sort,
 	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_filter	= hist_entry__sym_filter,
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
 /* --sort srcline */
 
+static char *hist_entry__get_srcline(struct hist_entry *he)
+{
+	struct map *map = he->ms.map;
+
+	if (!map)
+		return SRCLINE_UNKNOWN;
+
+	return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
+			   he->ms.sym, true);
+}
+
 static int64_t
 sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	if (!left->srcline) {
-		if (!left->ms.map)
-			left->srcline = SRCLINE_UNKNOWN;
-		else {
-			struct map *map = left->ms.map;
-			left->srcline = get_srcline(map->dso,
-					   map__rip_2objdump(map, left->ip),
-						    left->ms.sym, true);
-		}
-	}
-	if (!right->srcline) {
-		if (!right->ms.map)
-			right->srcline = SRCLINE_UNKNOWN;
-		else {
-			struct map *map = right->ms.map;
-			right->srcline = get_srcline(map->dso,
-					     map__rip_2objdump(map, right->ip),
-						     right->ms.sym, true);
-		}
-	}
+	if (!left->srcline)
+		left->srcline = hist_entry__get_srcline(left);
+	if (!right->srcline)
+		right->srcline = hist_entry__get_srcline(right);
+
 	return strcmp(right->srcline, left->srcline);
 }
 
 static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline);
+	if (!he->srcline)
+		he->srcline = hist_entry__get_srcline(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
 }
 
 struct sort_entry sort_srcline = {
@@ -327,11 +364,14 @@
 
 static char no_srcfile[1];
 
-static char *get_srcfile(struct hist_entry *e)
+static char *hist_entry__get_srcfile(struct hist_entry *e)
 {
 	char *sf, *p;
 	struct map *map = e->ms.map;
 
+	if (!map)
+		return no_srcfile;
+
 	sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
 			 e->ms.sym, false, true);
 	if (!strcmp(sf, SRCLINE_UNKNOWN))
@@ -348,25 +388,21 @@
 static int64_t
 sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	if (!left->srcfile) {
-		if (!left->ms.map)
-			left->srcfile = no_srcfile;
-		else
-			left->srcfile = get_srcfile(left);
-	}
-	if (!right->srcfile) {
-		if (!right->ms.map)
-			right->srcfile = no_srcfile;
-		else
-			right->srcfile = get_srcfile(right);
-	}
+	if (!left->srcfile)
+		left->srcfile = hist_entry__get_srcfile(left);
+	if (!right->srcfile)
+		right->srcfile = hist_entry__get_srcfile(right);
+
 	return strcmp(right->srcfile, left->srcfile);
 }
 
 static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
+	if (!he->srcfile)
+		he->srcfile = hist_entry__get_srcfile(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
 }
 
 struct sort_entry sort_srcfile = {
@@ -439,10 +475,21 @@
 	return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
 }
 
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+	int sk = *(const int *)arg;
+
+	if (type != HIST_FILTER__SOCKET)
+		return -1;
+
+	return sk >= 0 && he->socket != sk;
+}
+
 struct sort_entry sort_socket = {
 	.se_header      = "Socket",
 	.se_cmp	        = sort__socket_cmp,
 	.se_snprintf    = hist_entry__socket_snprintf,
+	.se_filter      = hist_entry__socket_filter,
 	.se_width_idx	= HISTC_SOCKET,
 };
 
@@ -483,9 +530,6 @@
 	if (right->trace_output == NULL)
 		right->trace_output = get_trace_output(right);
 
-	hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
-	hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
-
 	return strcmp(right->trace_output, left->trace_output);
 }
 
@@ -496,11 +540,11 @@
 
 	evsel = hists_to_evsel(he->hists);
 	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
-		return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
+		return scnprintf(bf, size, "%-.*s", width, "N/A");
 
 	if (he->trace_output == NULL)
 		he->trace_output = get_trace_output(he);
-	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
+	return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
 }
 
 struct sort_entry sort_trace = {
@@ -532,6 +576,18 @@
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->from.map ||
+		       he->branch_info->from.map->dso != dso);
+}
+
 static int64_t
 sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -552,6 +608,18 @@
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+				     const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->to.map ||
+		       he->branch_info->to.map->dso != dso);
+}
+
 static int64_t
 sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -613,10 +681,35 @@
 	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->from.sym &&
+			strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->to.sym &&
+		        strstr(he->branch_info->to.sym->name, sym));
+}
+
 struct sort_entry sort_dso_from = {
 	.se_header	= "Source Shared Object",
 	.se_cmp		= sort__dso_from_cmp,
 	.se_snprintf	= hist_entry__dso_from_snprintf,
+	.se_filter	= hist_entry__dso_from_filter,
 	.se_width_idx	= HISTC_DSO_FROM,
 };
 
@@ -624,6 +717,7 @@
 	.se_header	= "Target Shared Object",
 	.se_cmp		= sort__dso_to_cmp,
 	.se_snprintf	= hist_entry__dso_to_snprintf,
+	.se_filter	= hist_entry__dso_to_filter,
 	.se_width_idx	= HISTC_DSO_TO,
 };
 
@@ -631,6 +725,7 @@
 	.se_header	= "Source Symbol",
 	.se_cmp		= sort__sym_from_cmp,
 	.se_snprintf	= hist_entry__sym_from_snprintf,
+	.se_filter	= hist_entry__sym_from_filter,
 	.se_width_idx	= HISTC_SYMBOL_FROM,
 };
 
@@ -638,6 +733,7 @@
 	.se_header	= "Target Symbol",
 	.se_cmp		= sort__sym_to_cmp,
 	.se_snprintf	= hist_entry__sym_to_snprintf,
+	.se_filter	= hist_entry__sym_to_filter,
 	.se_width_idx	= HISTC_SYMBOL_TO,
 };
 
@@ -797,20 +893,10 @@
 static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	const char *out;
-	u64 mask = PERF_MEM_LOCK_NA;
+	char out[10];
 
-	if (he->mem_info)
-		mask = he->mem_info->data_src.mem_lock;
-
-	if (mask & PERF_MEM_LOCK_NA)
-		out = "N/A";
-	else if (mask & PERF_MEM_LOCK_LOCKED)
-		out = "Yes";
-	else
-		out = "No";
-
-	return repsep_snprintf(bf, size, "%-*s", width, out);
+	perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%.*s", width, out);
 }
 
 static int64_t
@@ -832,54 +918,12 @@
 	return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
 }
 
-static const char * const tlb_access[] = {
-	"N/A",
-	"HIT",
-	"MISS",
-	"L1",
-	"L2",
-	"Walker",
-	"Fault",
-};
-#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
-
 static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t l = 0, i;
-	u64 m = PERF_MEM_TLB_NA;
-	u64 hit, miss;
 
-	out[0] = '\0';
-
-	if (he->mem_info)
-		m = he->mem_info->data_src.mem_dtlb;
-
-	hit = m & PERF_MEM_TLB_HIT;
-	miss = m & PERF_MEM_TLB_MISS;
-
-	/* already taken care of */
-	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
-
-	for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, tlb_access[i], sz - l);
-		l += strlen(tlb_access[i]);
-	}
-	if (*out == '\0')
-		strcpy(out, "N/A");
-	if (hit)
-		strncat(out, " hit", sz - l);
-	if (miss)
-		strncat(out, " miss", sz - l);
-
+	perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -902,61 +946,12 @@
 	return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
 }
 
-static const char * const mem_lvl[] = {
-	"N/A",
-	"HIT",
-	"MISS",
-	"L1",
-	"LFB",
-	"L2",
-	"L3",
-	"Local RAM",
-	"Remote RAM (1 hop)",
-	"Remote RAM (2 hops)",
-	"Remote Cache (1 hop)",
-	"Remote Cache (2 hops)",
-	"I/O",
-	"Uncached",
-};
-#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
-
 static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t i, l = 0;
-	u64 m =  PERF_MEM_LVL_NA;
-	u64 hit, miss;
 
-	if (he->mem_info)
-		m  = he->mem_info->data_src.mem_lvl;
-
-	out[0] = '\0';
-
-	hit = m & PERF_MEM_LVL_HIT;
-	miss = m & PERF_MEM_LVL_MISS;
-
-	/* already taken care of */
-	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
-
-	for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, mem_lvl[i], sz - l);
-		l += strlen(mem_lvl[i]);
-	}
-	if (*out == '\0')
-		strcpy(out, "N/A");
-	if (hit)
-		strncat(out, " hit", sz - l);
-	if (miss)
-		strncat(out, " miss", sz - l);
-
+	perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -979,51 +974,15 @@
 	return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
 }
 
-static const char * const snoop_access[] = {
-	"N/A",
-	"None",
-	"Miss",
-	"Hit",
-	"HitM",
-};
-#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
-
 static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t i, l = 0;
-	u64 m = PERF_MEM_SNOOP_NA;
 
-	out[0] = '\0';
-
-	if (he->mem_info)
-		m = he->mem_info->data_src.mem_snoop;
-
-	for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, snoop_access[i], sz - l);
-		l += strlen(snoop_access[i]);
-	}
-
-	if (*out == '\0')
-		strcpy(out, "N/A");
-
+	perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
-static inline  u64 cl_address(u64 address)
-{
-	/* return the cacheline of the address */
-	return (address & ~(cacheline_size - 1));
-}
-
 static int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1440,20 +1399,6 @@
 	struct sort_entry *se;
 };
 
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
-{
-	struct hpp_sort_entry *hse_a;
-	struct hpp_sort_entry *hse_b;
-
-	if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
-		return false;
-
-	hse_a = container_of(a, struct hpp_sort_entry, hpp);
-	hse_b = container_of(b, struct hpp_sort_entry, hpp);
-
-	return hse_a->se == hse_b->se;
-}
-
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 {
 	struct hpp_sort_entry *hse;
@@ -1539,8 +1484,56 @@
 	return sort_fn(a, b);
 }
 
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+	return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key)					\
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
+{								\
+	struct hpp_sort_entry *hse;				\
+								\
+	if (!perf_hpp__is_sort_entry(fmt))			\
+		return false;					\
+								\
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);	\
+	return hse->se == &sort_ ## key ;			\
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_sort_entry *hse_a;
+	struct hpp_sort_entry *hse_b;
+
+	if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+		return false;
+
+	hse_a = container_of(a, struct hpp_sort_entry, hpp);
+	hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+	return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	free(hse);
+}
+
 static struct hpp_sort_entry *
-__sort_dimension__alloc_hpp(struct sort_dimension *sd)
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
 {
 	struct hpp_sort_entry *hse;
 
@@ -1560,40 +1553,92 @@
 	hse->hpp.cmp = __sort__hpp_cmp;
 	hse->hpp.collapse = __sort__hpp_collapse;
 	hse->hpp.sort = __sort__hpp_sort;
+	hse->hpp.equal = __sort__hpp_equal;
+	hse->hpp.free = hse_free;
 
 	INIT_LIST_HEAD(&hse->hpp.list);
 	INIT_LIST_HEAD(&hse->hpp.sort_list);
 	hse->hpp.elide = false;
 	hse->hpp.len = 0;
 	hse->hpp.user_len = 0;
+	hse->hpp.level = level;
 
 	return hse;
 }
 
-bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+static void hpp_free(struct perf_hpp_fmt *fmt)
 {
-	return format->header == __sort__hpp_header;
+	free(fmt);
 }
 
-static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+						       int level)
 {
-	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+	struct perf_hpp_fmt *fmt;
+
+	fmt = memdup(hd->fmt, sizeof(*fmt));
+	if (fmt) {
+		INIT_LIST_HEAD(&fmt->list);
+		INIT_LIST_HEAD(&fmt->sort_list);
+		fmt->free = hpp_free;
+		fmt->level = level;
+	}
+
+	return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+	int ret = -1;
+	int r;
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_filter == NULL)
+			continue;
+
+		/*
+		 * hist entry is filtered if any of sort key in the hpp list
+		 * is applied.  But it should skip non-matched filter types.
+		 */
+		r = hse->se->se_filter(he, type, arg);
+		if (r >= 0) {
+			if (ret < 0)
+				ret = 0;
+			ret |= r;
+		}
+	}
+
+	return ret;
+}
+
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+					  struct perf_hpp_list *list,
+					  int level)
+{
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
 
 	if (hse == NULL)
 		return -1;
 
-	perf_hpp__register_sort_field(&hse->hpp);
+	perf_hpp_list__register_sort_field(list, &hse->hpp);
 	return 0;
 }
 
-static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+					    struct perf_hpp_list *list)
 {
-	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
 
 	if (hse == NULL)
 		return -1;
 
-	perf_hpp__column_register(&hse->hpp);
+	perf_hpp_list__column_register(list, &hse->hpp);
 	return 0;
 }
 
@@ -1727,6 +1772,9 @@
 	if (hde->raw_trace)
 		goto raw_field;
 
+	if (!he->trace_output)
+		he->trace_output = get_trace_output(he);
+
 	field = hde->field;
 	namelen = strlen(field->name);
 	str = he->trace_output;
@@ -1776,6 +1824,11 @@
 
 	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
 
+	if (b == NULL) {
+		update_dynamic_len(hde, a);
+		return 0;
+	}
+
 	field = hde->field;
 	if (field->flags & FIELD_IS_DYNAMIC) {
 		unsigned long long dyn;
@@ -1790,9 +1843,6 @@
 	} else {
 		offset = field->offset;
 		size = field->size;
-
-		update_dynamic_len(hde, a);
-		update_dynamic_len(hde, b);
 	}
 
 	return memcmp(a->raw_data + offset, b->raw_data + offset, size);
@@ -1803,8 +1853,31 @@
 	return fmt->cmp == __sort__hde_cmp;
 }
 
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_dynamic_entry *hde_a;
+	struct hpp_dynamic_entry *hde_b;
+
+	if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+		return false;
+
+	hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+	hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+	return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+	free(hde);
+}
+
 static struct hpp_dynamic_entry *
-__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+		      int level)
 {
 	struct hpp_dynamic_entry *hde;
 
@@ -1827,16 +1900,47 @@
 	hde->hpp.cmp = __sort__hde_cmp;
 	hde->hpp.collapse = __sort__hde_cmp;
 	hde->hpp.sort = __sort__hde_cmp;
+	hde->hpp.equal = __sort__hde_equal;
+	hde->hpp.free = hde_free;
 
 	INIT_LIST_HEAD(&hde->hpp.list);
 	INIT_LIST_HEAD(&hde->hpp.sort_list);
 	hde->hpp.elide = false;
 	hde->hpp.len = 0;
 	hde->hpp.user_len = 0;
+	hde->hpp.level = level;
 
 	return hde;
 }
 
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_fmt *new_fmt = NULL;
+
+	if (perf_hpp__is_sort_entry(fmt)) {
+		struct hpp_sort_entry *hse, *new_hse;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		new_hse = memdup(hse, sizeof(*hse));
+		if (new_hse)
+			new_fmt = &new_hse->hpp;
+	} else if (perf_hpp__is_dynamic_entry(fmt)) {
+		struct hpp_dynamic_entry *hde, *new_hde;
+
+		hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+		new_hde = memdup(hde, sizeof(*hde));
+		if (new_hde)
+			new_fmt = &new_hde->hpp;
+	} else {
+		new_fmt = memdup(fmt, sizeof(*fmt));
+	}
+
+	INIT_LIST_HEAD(&new_fmt->list);
+	INIT_LIST_HEAD(&new_fmt->sort_list);
+
+	return new_fmt;
+}
+
 static int parse_field_name(char *str, char **event, char **field, char **opt)
 {
 	char *event_name, *field_name, *opt_name;
@@ -1908,11 +2012,11 @@
 
 static int __dynamic_dimension__add(struct perf_evsel *evsel,
 				    struct format_field *field,
-				    bool raw_trace)
+				    bool raw_trace, int level)
 {
 	struct hpp_dynamic_entry *hde;
 
-	hde = __alloc_dynamic_entry(evsel, field);
+	hde = __alloc_dynamic_entry(evsel, field, level);
 	if (hde == NULL)
 		return -ENOMEM;
 
@@ -1922,14 +2026,14 @@
 	return 0;
 }
 
-static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
 {
 	int ret;
 	struct format_field *field;
 
 	field = evsel->tp_format->format.fields;
 	while (field) {
-		ret = __dynamic_dimension__add(evsel, field, raw_trace);
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 		if (ret < 0)
 			return ret;
 
@@ -1938,7 +2042,8 @@
 	return 0;
 }
 
-static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
+				  int level)
 {
 	int ret;
 	struct perf_evsel *evsel;
@@ -1947,7 +2052,7 @@
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
-		ret = add_evsel_fields(evsel, raw_trace);
+		ret = add_evsel_fields(evsel, raw_trace, level);
 		if (ret < 0)
 			return ret;
 	}
@@ -1955,7 +2060,7 @@
 }
 
 static int add_all_matching_fields(struct perf_evlist *evlist,
-				   char *field_name, bool raw_trace)
+				   char *field_name, bool raw_trace, int level)
 {
 	int ret = -ESRCH;
 	struct perf_evsel *evsel;
@@ -1969,14 +2074,15 @@
 		if (field == NULL)
 			continue;
 
-		ret = __dynamic_dimension__add(evsel, field, raw_trace);
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 		if (ret < 0)
 			break;
 	}
 	return ret;
 }
 
-static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
+			     int level)
 {
 	char *str, *event_name, *field_name, *opt_name;
 	struct perf_evsel *evsel;
@@ -2006,12 +2112,12 @@
 	}
 
 	if (!strcmp(field_name, "trace_fields")) {
-		ret = add_all_dynamic_fields(evlist, raw_trace);
+		ret = add_all_dynamic_fields(evlist, raw_trace, level);
 		goto out;
 	}
 
 	if (event_name == NULL) {
-		ret = add_all_matching_fields(evlist, field_name, raw_trace);
+		ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
 		goto out;
 	}
 
@@ -2029,7 +2135,7 @@
 	}
 
 	if (!strcmp(field_name, "*")) {
-		ret = add_evsel_fields(evsel, raw_trace);
+		ret = add_evsel_fields(evsel, raw_trace, level);
 	} else {
 		field = pevent_find_any_field(evsel->tp_format, field_name);
 		if (field == NULL) {
@@ -2038,7 +2144,7 @@
 			return -ENOENT;
 		}
 
-		ret = __dynamic_dimension__add(evsel, field, raw_trace);
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 	}
 
 out:
@@ -2046,12 +2152,14 @@
 	return ret;
 }
 
-static int __sort_dimension__add(struct sort_dimension *sd)
+static int __sort_dimension__add(struct sort_dimension *sd,
+				 struct perf_hpp_list *list,
+				 int level)
 {
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_sort(sd) < 0)
+	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
 		return -1;
 
 	if (sd->entry->se_collapse)
@@ -2062,46 +2170,63 @@
 	return 0;
 }
 
-static int __hpp_dimension__add(struct hpp_dimension *hd)
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+				struct perf_hpp_list *list,
+				int level)
 {
-	if (!hd->taken) {
-		hd->taken = 1;
+	struct perf_hpp_fmt *fmt;
 
-		perf_hpp__register_sort_field(hd->fmt);
-	}
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, level);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__register_sort_field(list, fmt);
 	return 0;
 }
 
-static int __sort_dimension__add_output(struct sort_dimension *sd)
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+					struct sort_dimension *sd)
 {
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_output(sd) < 0)
+	if (__sort_dimension__add_hpp_output(sd, list) < 0)
 		return -1;
 
 	sd->taken = 1;
 	return 0;
 }
 
-static int __hpp_dimension__add_output(struct hpp_dimension *hd)
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+				       struct hpp_dimension *hd)
 {
-	if (!hd->taken) {
-		hd->taken = 1;
+	struct perf_hpp_fmt *fmt;
 
-		perf_hpp__column_register(hd->fmt);
-	}
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, 0);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__column_register(list, fmt);
 	return 0;
 }
 
 int hpp_dimension__add_output(unsigned col)
 {
 	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	return __hpp_dimension__add_output(&hpp_sort_dimensions[col]);
+	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(const char *tok,
-			       struct perf_evlist *evlist __maybe_unused)
+static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			       struct perf_evlist *evlist __maybe_unused,
+			       int level)
 {
 	unsigned int i;
 
@@ -2136,9 +2261,13 @@
 			sort__has_dso = 1;
 		} else if (sd->entry == &sort_socket) {
 			sort__has_socket = 1;
+		} else if (sd->entry == &sort_thread) {
+			sort__has_thread = 1;
+		} else if (sd->entry == &sort_comm) {
+			sort__has_comm = 1;
 		}
 
-		return __sort_dimension__add(sd);
+		return __sort_dimension__add(sd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2147,7 +2276,7 @@
 		if (strncasecmp(tok, hd->name, strlen(tok)))
 			continue;
 
-		return __hpp_dimension__add(hd);
+		return __hpp_dimension__add(hd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2162,7 +2291,7 @@
 		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
@@ -2178,16 +2307,60 @@
 		if (sd->entry == &sort_mem_daddr_sym)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
-	if (!add_dynamic_entry(evlist, tok))
+	if (!add_dynamic_entry(evlist, tok, level))
 		return 0;
 
 	return -ESRCH;
 }
 
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+			   struct perf_evlist *evlist)
+{
+	char *tmp, *tok;
+	int ret = 0;
+	int level = 0;
+	int next_level = 1;
+	bool in_group = false;
+
+	do {
+		tok = str;
+		tmp = strpbrk(str, "{}, ");
+		if (tmp) {
+			if (in_group)
+				next_level = level;
+			else
+				next_level = level + 1;
+
+			if (*tmp == '{')
+				in_group = true;
+			else if (*tmp == '}')
+				in_group = false;
+
+			*tmp = '\0';
+			str = tmp + 1;
+		}
+
+		if (*tok) {
+			ret = sort_dimension__add(list, tok, evlist, level);
+			if (ret == -EINVAL) {
+				error("Invalid --sort key: `%s'", tok);
+				break;
+			} else if (ret == -ESRCH) {
+				error("Unknown --sort key: `%s'", tok);
+				break;
+			}
+		}
+
+		level = next_level;
+	} while (tmp);
+
+	return ret;
+}
+
 static const char *get_default_sort_order(struct perf_evlist *evlist)
 {
 	const char *default_sort_orders[] = {
@@ -2282,7 +2455,7 @@
 
 static int __setup_sorting(struct perf_evlist *evlist)
 {
-	char *tmp, *tok, *str;
+	char *str;
 	const char *sort_keys;
 	int ret = 0;
 
@@ -2320,17 +2493,7 @@
 		}
 	}
 
-	for (tok = strtok_r(str, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		ret = sort_dimension__add(tok, evlist);
-		if (ret == -EINVAL) {
-			error("Invalid --sort key: `%s'", tok);
-			break;
-		} else if (ret == -ESRCH) {
-			error("Unknown --sort key: `%s'", tok);
-			break;
-		}
-	}
+	ret = setup_sort_list(&perf_hpp_list, str, evlist);
 
 	free(str);
 	return ret;
@@ -2341,7 +2504,7 @@
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2401,7 +2564,7 @@
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2413,7 +2576,7 @@
 	 * It makes no sense to elide all of sort entries.
 	 * Just revert them to show up again.
 	 */
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2421,7 +2584,7 @@
 			return;
 	}
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2429,7 +2592,7 @@
 	}
 }
 
-static int output_field_add(char *tok)
+static int output_field_add(struct perf_hpp_list *list, char *tok)
 {
 	unsigned int i;
 
@@ -2439,7 +2602,7 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		return __sort_dimension__add_output(sd);
+		return __sort_dimension__add_output(list, sd);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2448,7 +2611,7 @@
 		if (strncasecmp(tok, hd->name, strlen(tok)))
 			continue;
 
-		return __hpp_dimension__add_output(hd);
+		return __hpp_dimension__add_output(list, hd);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2457,7 +2620,7 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		return __sort_dimension__add_output(sd);
+		return __sort_dimension__add_output(list, sd);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
@@ -2466,12 +2629,32 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		return __sort_dimension__add_output(sd);
+		return __sort_dimension__add_output(list, sd);
 	}
 
 	return -ESRCH;
 }
 
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+	char *tmp, *tok;
+	int ret = 0;
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		ret = output_field_add(list, tok);
+		if (ret == -EINVAL) {
+			error("Invalid --fields key: `%s'", tok);
+			break;
+		} else if (ret == -ESRCH) {
+			error("Unknown --fields key: `%s'", tok);
+			break;
+		}
+	}
+
+	return ret;
+}
+
 static void reset_dimensions(void)
 {
 	unsigned int i;
@@ -2496,7 +2679,7 @@
 
 static int __setup_output_field(void)
 {
-	char *tmp, *tok, *str, *strp;
+	char *str, *strp;
 	int ret = -EINVAL;
 
 	if (field_order == NULL)
@@ -2516,17 +2699,7 @@
 		goto out;
 	}
 
-	for (tok = strtok_r(strp, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		ret = output_field_add(tok);
-		if (ret == -EINVAL) {
-			error("Invalid --fields key: `%s'", tok);
-			break;
-		} else if (ret == -ESRCH) {
-			error("Unknown --fields key: `%s'", tok);
-			break;
-		}
-	}
+	ret = setup_output_list(&perf_hpp_list, strp);
 
 out:
 	free(str);
@@ -2542,7 +2715,7 @@
 		return err;
 
 	if (parent_pattern != default_parent_pattern) {
-		err = sort_dimension__add("parent", evlist);
+		err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
 		if (err < 0)
 			return err;
 	}
@@ -2560,9 +2733,13 @@
 		return err;
 
 	/* copy sort keys to output fields */
-	perf_hpp__setup_output_field();
+	perf_hpp__setup_output_field(&perf_hpp_list);
 	/* and then copy output fields to sort keys */
-	perf_hpp__append_sort_keys();
+	perf_hpp__append_sort_keys(&perf_hpp_list);
+
+	/* setup hists-specific output fields */
+	if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+		return -1;
 
 	return 0;
 }
@@ -2578,5 +2755,5 @@
 	sort_order = NULL;
 
 	reset_dimensions();
-	perf_hpp__reset_output_field();
+	perf_hpp__reset_output_field(&perf_hpp_list);
 }
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 687bbb12..3f4e359 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -32,9 +32,12 @@
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
 extern int sort__need_collapse;
+extern int sort__has_dso;
 extern int sort__has_parent;
 extern int sort__has_sym;
 extern int sort__has_socket;
+extern int sort__has_thread;
+extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
@@ -94,9 +97,11 @@
 	s32			socket;
 	s32			cpu;
 	u8			cpumode;
+	u8			depth;
 
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
+	bool			leaf;
 
 	char			level;
 	u8			filtered;
@@ -113,18 +118,28 @@
 			bool	init_have_children;
 			bool	unfolded;
 			bool	has_children;
+			bool	has_no_entry;
 		};
 	};
 	char			*srcline;
 	char			*srcfile;
 	struct symbol		*parent;
-	struct rb_root		sorted_chain;
 	struct branch_info	*branch_info;
 	struct hists		*hists;
 	struct mem_info		*mem_info;
 	void			*raw_data;
 	u32			raw_size;
 	void			*trace_output;
+	struct perf_hpp_list	*hpp_list;
+	struct hist_entry	*parent_he;
+	union {
+		/* this is for hierarchical entry structure */
+		struct {
+			struct rb_root	hroot_in;
+			struct rb_root  hroot_out;
+		};				/* non-leaf entries */
+		struct rb_root	sorted_chain;	/* leaf entry has callchains */
+	};
 	struct callchain_root	callchain[0]; /* must be last member */
 };
 
@@ -160,6 +175,17 @@
 	return period * 100.0 / total_period;
 }
 
+static inline u64 cl_address(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & ~(cacheline_size - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & (cacheline_size - 1));
+}
 
 enum sort_mode {
 	SORT_MODE__NORMAL,
@@ -221,6 +247,7 @@
 	int64_t	(*se_sort)(struct hist_entry *, struct hist_entry *);
 	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
 			       unsigned int width);
+	int	(*se_filter)(struct hist_entry *he, int type, const void *arg);
 	u8	se_width_idx;
 };
 
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 6ac0314..b33ffb2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -2,6 +2,7 @@
 #include "evsel.h"
 #include "stat.h"
 #include "color.h"
+#include "pmu.h"
 
 enum {
 	CTX_BIT_USER	= 1 << 0,
@@ -14,6 +15,13 @@
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
@@ -28,9 +36,15 @@
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
 
+void perf_stat__init_shadow_stats(void)
+{
+	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+}
+
 static int evsel_context(struct perf_evsel *evsel)
 {
 	int ctx = 0;
@@ -137,9 +151,10 @@
 	return color;
 }
 
-static void print_stalled_cycles_frontend(FILE *out, int cpu,
+static void print_stalled_cycles_frontend(int cpu,
 					  struct perf_evsel *evsel
-					  __maybe_unused, double avg)
+					  __maybe_unused, double avg,
+					  struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -152,14 +167,17 @@
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " frontend cycles idle   ");
+	if (ratio)
+		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+				  ratio);
+	else
+		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
 }
 
-static void print_stalled_cycles_backend(FILE *out, int cpu,
+static void print_stalled_cycles_backend(int cpu,
 					 struct perf_evsel *evsel
-					 __maybe_unused, double avg)
+					 __maybe_unused, double avg,
+					 struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -172,14 +190,13 @@
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " backend  cycles idle   ");
+	out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
 }
 
-static void print_branch_misses(FILE *out, int cpu,
+static void print_branch_misses(int cpu,
 				struct perf_evsel *evsel __maybe_unused,
-				double avg)
+				double avg,
+				struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -192,14 +209,13 @@
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all branches        ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
 }
 
-static void print_l1_dcache_misses(FILE *out, int cpu,
+static void print_l1_dcache_misses(int cpu,
 				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
+				   double avg,
+				   struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -212,14 +228,13 @@
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all L1-dcache hits  ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 }
 
-static void print_l1_icache_misses(FILE *out, int cpu,
+static void print_l1_icache_misses(int cpu,
 				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
+				   double avg,
+				   struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -231,15 +246,13 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all L1-icache hits  ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 }
 
-static void print_dtlb_cache_misses(FILE *out, int cpu,
+static void print_dtlb_cache_misses(int cpu,
 				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
+				    double avg,
+				    struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -251,15 +264,13 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all dTLB cache hits ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 }
 
-static void print_itlb_cache_misses(FILE *out, int cpu,
+static void print_itlb_cache_misses(int cpu,
 				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
+				    double avg,
+				    struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -271,15 +282,13 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all iTLB cache hits ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 }
 
-static void print_ll_cache_misses(FILE *out, int cpu,
+static void print_ll_cache_misses(int cpu,
 				  struct perf_evsel *evsel __maybe_unused,
-				  double avg)
+				  double avg,
+				  struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -291,15 +300,15 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all LL-cache hits   ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-				   double avg, int cpu, enum aggr_mode aggr)
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+				   double avg, int cpu,
+				   struct perf_stat_output_ctx *out)
 {
+	void *ctxp = out->ctx;
+	print_metric_t print_metric = out->print_metric;
 	double total, ratio = 0.0, total2;
 	int ctx = evsel_context(evsel);
 
@@ -307,119 +316,145 @@
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total) {
 			ratio = avg / total;
-			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+			print_metric(ctxp, NULL, "%7.2f ",
+					"insn per cycle", ratio);
 		} else {
-			fprintf(out, "                                   ");
+			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
 		}
 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
 		if (total && avg) {
+			out->new_line(ctxp);
 			ratio = total / avg;
-			fprintf(out, "\n");
-			if (aggr == AGGR_NONE)
-				fprintf(out, "        ");
-			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+			print_metric(ctxp, NULL, "%7.2f ",
+					"stalled cycles per insn",
+					ratio);
+		} else if (have_frontend_stalled) {
+			print_metric(ctxp, NULL, NULL,
+				     "stalled cycles per insn", 0);
 		}
-
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-			runtime_branches_stats[ctx][cpu].n != 0) {
-		print_branch_misses(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+		if (runtime_branches_stats[ctx][cpu].n != 0)
+			print_branch_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all branches", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-		print_l1_dcache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
+			print_l1_dcache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_icache_stats[ctx][cpu].n != 0) {
-		print_l1_icache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_l1_icache_stats[ctx][cpu].n != 0)
+			print_l1_icache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-		print_dtlb_cache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
+			print_dtlb_cache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-		print_itlb_cache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
+			print_itlb_cache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_ll_cache_stats[ctx][cpu].n != 0) {
-		print_ll_cache_misses(out, cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-			runtime_cacherefs_stats[ctx][cpu].n != 0) {
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_ll_cache_stats[ctx][cpu].n != 0)
+			print_ll_cache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
 
 		if (total)
 			ratio = avg * 100 / total;
 
-		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
-
+		if (runtime_cacherefs_stats[ctx][cpu].n != 0)
+			print_metric(ctxp, NULL, "%8.3f %%",
+				     "of all cache refs", ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(out, cpu, evsel, avg);
+		print_stalled_cycles_frontend(cpu, evsel, avg, out);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(out, cpu, evsel, avg);
+		print_stalled_cycles_backend(cpu, evsel, avg, out);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
 		if (total) {
 			ratio = avg / total;
-			fprintf(out, " # %8.3f GHz                    ", ratio);
+			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
 		} else {
-			fprintf(out, "                                   ");
+			print_metric(ctxp, NULL, NULL, "Ghz", 0);
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
-			fprintf(out,
-				" #   %5.2f%% transactional cycles   ",
-				100.0 * (avg / total));
+			print_metric(ctxp, NULL,
+					"%7.2f%%", "transactional cycles",
+					100.0 * (avg / total));
+		else
+			print_metric(ctxp, NULL, NULL, "transactional cycles",
+				     0);
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
-			fprintf(out,
-				" #   %5.2f%% aborted cycles         ",
+			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
 				100.0 * ((total2-avg) / total));
-	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		else
+			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (avg)
 			ratio = total / avg;
 
-		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
-	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
+			print_metric(ctxp, NULL, "%8.0f",
+				     "cycles / transaction", ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "cycles / transaction",
+				     0);
+	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (avg)
 			ratio = total / avg;
 
-		fprintf(out, " # %8.0f cycles / elision       ", ratio);
+		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
 	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
-			fprintf(out, " # %8.3f CPUs utilized          ", avg / ratio);
+			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+				     avg / ratio);
 		else
-			fprintf(out, "                                   ");
+			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
+		char unit_buf[10];
 
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
@@ -429,9 +464,9 @@
 			ratio *= 1000;
 			unit = 'K';
 		}
-
-		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
 	} else {
-		fprintf(out, "                                   ");
+		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}
 }
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2f901d1..4d9b481 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -97,7 +97,7 @@
 	}
 }
 
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 {
 	int i;
 	struct perf_stat_evsel *ps = evsel->priv;
@@ -108,7 +108,7 @@
 	perf_stat_evsel_id_init(evsel);
 }
 
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
 	evsel->priv = zalloc(sizeof(struct perf_stat_evsel));
 	if (evsel->priv == NULL)
@@ -117,13 +117,13 @@
 	return 0;
 }
 
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 {
 	zfree(&evsel->priv);
 }
 
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-				      int ncpus, int nthreads)
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+					     int ncpus, int nthreads)
 {
 	struct perf_counts *counts;
 
@@ -134,13 +134,13 @@
 	return counts ? 0 : -ENOMEM;
 }
 
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
 	perf_counts__delete(evsel->prev_raw_counts);
 	evsel->prev_raw_counts = NULL;
 }
 
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
 {
 	int ncpus = perf_evsel__nr_cpus(evsel);
 	int nthreads = thread_map__nr(evsel->threads);
@@ -310,7 +310,16 @@
 	int i, ret;
 
 	aggr->val = aggr->ena = aggr->run = 0;
-	init_stats(ps->res_stats);
+
+	/*
+	 * We calculate counter's data every interval,
+	 * and the display code shows ps->res_stats
+	 * avg value. We need to zero the stats for
+	 * interval mode, otherwise overall avg running
+	 * averages will be shown for each interval.
+	 */
+	if (config->interval)
+		init_stats(ps->res_stats);
 
 	if (counter->per_pkg)
 		zero_per_pkg(counter);
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 086f4e1..0150e78 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -68,21 +68,23 @@
 
 extern struct stats walltime_nsecs_stats;
 
+typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+			       const char *fmt, double val);
+typedef void (*new_line_t )(void *ctx);
+
+void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 				    int cpu);
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-				   double avg, int cpu, enum aggr_mode aggr);
+struct perf_stat_output_ctx {
+	void *ctx;
+	print_metric_t print_metric;
+	new_line_t new_line;
+};
 
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
-
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-				      int ncpus, int nthreads);
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel);
-
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+				   double avg, int cpu,
+				   struct perf_stat_output_ctx *out);
 
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct perf_evlist *evlist);
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 25671fa..d3d2792 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -51,30 +51,6 @@
 	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
-static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
-				   const void *data, size_t dlen)
-{
-	if (pos + len < pos)
-		die("you want to use way too much memory");
-	if (pos > sb->len)
-		die("`pos' is too far after the end of the buffer");
-	if (pos + len > sb->len)
-		die("`pos + len' is too far after the end of the buffer");
-
-	if (dlen >= len)
-		strbuf_grow(sb, dlen - len);
-	memmove(sb->buf + pos + dlen,
-			sb->buf + pos + len,
-			sb->len - pos - len);
-	memcpy(sb->buf + pos, data, dlen);
-	strbuf_setlen(sb, sb->len + dlen - len);
-}
-
-void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
-{
-	strbuf_splice(sb, pos, len, NULL, 0);
-}
-
 void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
 	strbuf_grow(sb, len);
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 529f2f0..7a32c83 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -77,8 +77,6 @@
 	sb->buf[sb->len] = '\0';
 }
 
-extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
-
 extern void strbuf_add(struct strbuf *, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 	strbuf_add(sb, s, strlen(s));
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 562b8eb..b1dd68f 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "demangle-java.h"
 #include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
@@ -1077,6 +1078,8 @@
 				demangle_flags = DMGL_PARAMS | DMGL_ANSI;
 
 			demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+			if (demangled == NULL)
+				demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
 			if (demangled != NULL)
 				elf_name = demangled;
 		}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3b2de6e..e7588dc 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1466,7 +1466,8 @@
 	 * Read the build id if possible. This is required for
 	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
 	 */
-	if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
+	if (is_regular_file(name) &&
+	    filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
 		dso__set_build_id(dso, build_id);
 
 	/*
@@ -1487,6 +1488,9 @@
 						   root_dir, name, PATH_MAX))
 			continue;
 
+		if (!is_regular_file(name))
+			continue;
+
 		/* Name is now the name of the next image to try */
 		if (symsrc__init(ss, dso, name, symtab_type) < 0)
 			continue;
@@ -1525,6 +1529,10 @@
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
+	if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
+		if (dso__build_id_is_kmod(dso, name, PATH_MAX))
+			kmod = true;
+
 	if (syms_ss)
 		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
 	else
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ccd1caa..a937053 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -110,7 +110,8 @@
 			has_filter,
 			show_ref_callgraph,
 			hide_unresolved,
-			raw_trace;
+			raw_trace,
+			report_hierarchy;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 802bb86..8ae051e 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <traceevent/event-parse.h>
 #include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
 #include "trace-event.h"
 #include "machine.h"
 #include "util.h"
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index 4d4210d..1b74164 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c
@@ -19,7 +19,7 @@
 	u64 quot, rem;
 
 	quot = cyc >> tc->time_shift;
-	rem  = cyc & ((1 << tc->time_shift) - 1);
+	rem  = cyc & (((u64)1 << tc->time_shift) - 1);
 	return tc->time_zero + quot * tc->time_mult +
 	       ((rem * tc->time_mult) >> tc->time_shift);
 }
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index ead9509..b7766c5 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -14,6 +14,7 @@
 #include <limits.h>
 #include <byteswap.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
@@ -507,54 +508,6 @@
 	return ret;
 }
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep)
-{
-	size_t size = 0, alloc_size = 0;
-	void *bf = NULL, *nbf;
-	int fd, n, err = 0;
-	char sbuf[STRERR_BUFSIZE];
-
-	fd = open(filename, O_RDONLY);
-	if (fd < 0)
-		return -errno;
-
-	do {
-		if (size == alloc_size) {
-			alloc_size += BUFSIZ;
-			nbf = realloc(bf, alloc_size);
-			if (!nbf) {
-				err = -ENOMEM;
-				break;
-			}
-
-			bf = nbf;
-		}
-
-		n = read(fd, bf + size, alloc_size - size);
-		if (n < 0) {
-			if (size) {
-				pr_warning("read failed %d: %s\n", errno,
-					 strerror_r(errno, sbuf, sizeof(sbuf)));
-				err = 0;
-			} else
-				err = -errno;
-
-			break;
-		}
-
-		size += n;
-	} while (n > 0);
-
-	if (!err) {
-		*sizep = size;
-		*buf   = bf;
-	} else
-		free(bf);
-
-	close(fd);
-	return err;
-}
-
 const char *get_filename_for_perf_kvm(void)
 {
 	const char *filename;
@@ -691,3 +644,66 @@
 
 	return tip;
 }
+
+bool is_regular_file(const char *file)
+{
+	struct stat st;
+
+	if (stat(file, &st))
+		return false;
+
+	return S_ISREG(st.st_mode);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+	struct timeval tv;
+	struct tm tm;
+	char dt[32];
+
+	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+		return -1;
+
+	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+		return -1;
+
+	scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+	return 0;
+}
+
+void print_binary(unsigned char *data, size_t len,
+		  size_t bytes_per_line, print_binary_t printer,
+		  void *extra)
+{
+	size_t i, j, mask;
+
+	if (!printer)
+		return;
+
+	bytes_per_line = roundup_pow_of_two(bytes_per_line);
+	mask = bytes_per_line - 1;
+
+	printer(BINARY_PRINT_DATA_BEGIN, 0, extra);
+	for (i = 0; i < len; i++) {
+		if ((i & mask) == 0) {
+			printer(BINARY_PRINT_LINE_BEGIN, -1, extra);
+			printer(BINARY_PRINT_ADDR, i, extra);
+		}
+
+		printer(BINARY_PRINT_NUM_DATA, data[i], extra);
+
+		if (((i & mask) == mask) || i == len - 1) {
+			for (j = 0; j < mask-(i & mask); j++)
+				printer(BINARY_PRINT_NUM_PAD, -1, extra);
+
+			printer(BINARY_PRINT_SEP, i, extra);
+			for (j = i & ~mask; j <= i; j++)
+				printer(BINARY_PRINT_CHAR_DATA, data[j], extra);
+			for (j = 0; j < mask-(i & mask); j++)
+				printer(BINARY_PRINT_CHAR_PAD, i, extra);
+			printer(BINARY_PRINT_LINE_END, -1, extra);
+		}
+	}
+	printer(BINARY_PRINT_DATA_END, -1, extra);
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index fe915e6..d0d50ce 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -82,6 +82,8 @@
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
 extern char buildid_dir[];
 
 /* On most systems <limits.h> would have given us this, but
@@ -303,7 +305,6 @@
 		  bool show_sym, bool unwind_inlines);
 void free_srcline(char *srcline);
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep);
 int perf_event_paranoid(void);
 
 void mem_bswap_64(void *src, int byte_size);
@@ -343,5 +344,27 @@
 #define KVER_PARAM(x)	KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
 
 const char *perf_tip(const char *dirpath);
+bool is_regular_file(const char *file);
+int fetch_current_timestamp(char *buf, size_t sz);
 
+enum binary_printer_ops {
+	BINARY_PRINT_DATA_BEGIN,
+	BINARY_PRINT_LINE_BEGIN,
+	BINARY_PRINT_ADDR,
+	BINARY_PRINT_NUM_DATA,
+	BINARY_PRINT_NUM_PAD,
+	BINARY_PRINT_SEP,
+	BINARY_PRINT_CHAR_DATA,
+	BINARY_PRINT_CHAR_PAD,
+	BINARY_PRINT_LINE_END,
+	BINARY_PRINT_DATA_END,
+};
+
+typedef void (*print_binary_t)(enum binary_printer_ops,
+			       unsigned int val,
+			       void *extra);
+
+void print_binary(unsigned char *data, size_t len,
+		  size_t bytes_per_line, print_binary_t printer,
+		  void *extra);
 #endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0dac7e0..3fa94e2 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1970,7 +1970,7 @@
 }
 
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
 	if (!do_nhm_platform_info)
 		return;
@@ -2142,7 +2142,7 @@
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
-double get_tdp(model)
+double get_tdp(unsigned int model)
 {
 	unsigned long long msr;
 
@@ -2256,7 +2256,7 @@
 	return;
 }
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
 		return;
@@ -2792,7 +2792,7 @@
 	perf_limit_reasons_probe(family, model);
 
 	if (debug)
-		dump_cstate_pstate_config_info();
+		dump_cstate_pstate_config_info(family, model);
 
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 7ec7df9..0c1a7e6 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -113,7 +113,7 @@
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
-pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
 	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
 
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 90bd2ea..b3281dc 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -217,13 +217,16 @@
 	return rc;
 }
 
+#define NFIT_TEST_ARS_RECORDS 4
+
 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
 		unsigned int buf_len)
 {
 	if (buf_len < sizeof(*nd_cmd))
 		return -EINVAL;
 
-	nd_cmd->max_ars_out = 256;
+	nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+		+ NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
 	nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
 
 	return 0;
@@ -246,7 +249,8 @@
 	if (buf_len < sizeof(*nd_cmd))
 		return -EINVAL;
 
-	nd_cmd->out_length = 256;
+	nd_cmd->out_length = sizeof(struct nd_cmd_ars_status);
+	/* TODO: emit error records */
 	nd_cmd->num_records = 0;
 	nd_cmd->address = 0;
 	nd_cmd->length = -1ULL;
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index 77edcdc..0572784 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -88,7 +88,11 @@
 		exit 1
 	fi
 
-	rm $file
+	rm $file 2>/dev/null
+	if [ $? -ne 0 ]; then
+		chattr -i $file
+		rm $file
+	fi
 
 	if [ -e $file ]; then
 		echo "$file couldn't be deleted" >&2
@@ -111,6 +115,7 @@
 		exit 1
 	fi
 
+	chattr -i $file
 	printf "$attrs" > $file
 
 	if [ -e $file ]; then
@@ -141,7 +146,11 @@
 			echo "$file could not be created" >&2
 			ret=1
 		else
-			rm $file
+			rm $file 2>/dev/null
+			if [ $? -ne 0 ]; then
+				chattr -i $file
+				rm $file
+			fi
 		fi
 	done
 
@@ -174,7 +183,11 @@
 
 		if [ -e $file ]; then
 			echo "Creating $file should have failed" >&2
-			rm $file
+			rm $file 2>/dev/null
+			if [ $? -ne 0 ]; then
+				chattr -i $file
+				rm $file
+			fi
 			ret=1
 		fi
 	done
diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c
index 8c07644..4af74f7 100644
--- a/tools/testing/selftests/efivarfs/open-unlink.c
+++ b/tools/testing/selftests/efivarfs/open-unlink.c
@@ -1,10 +1,68 @@
+#include <errno.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <linux/fs.h>
+
+static int set_immutable(const char *path, int immutable)
+{
+	unsigned int flags;
+	int fd;
+	int rc;
+	int error;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+	if (rc < 0) {
+		error = errno;
+		close(fd);
+		errno = error;
+		return rc;
+	}
+
+	if (immutable)
+		flags |= FS_IMMUTABLE_FL;
+	else
+		flags &= ~FS_IMMUTABLE_FL;
+
+	rc = ioctl(fd, FS_IOC_SETFLAGS, &flags);
+	error = errno;
+	close(fd);
+	errno = error;
+	return rc;
+}
+
+static int get_immutable(const char *path)
+{
+	unsigned int flags;
+	int fd;
+	int rc;
+	int error;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+	if (rc < 0) {
+		error = errno;
+		close(fd);
+		errno = error;
+		return rc;
+	}
+	close(fd);
+	if (flags & FS_IMMUTABLE_FL)
+		return 1;
+	return 0;
+}
 
 int main(int argc, char **argv)
 {
@@ -27,7 +85,7 @@
 	buf[4] = 0;
 
 	/* create a test variable */
-	fd = open(path, O_WRONLY | O_CREAT);
+	fd = open(path, O_WRONLY | O_CREAT, 0600);
 	if (fd < 0) {
 		perror("open(O_WRONLY)");
 		return EXIT_FAILURE;
@@ -41,6 +99,18 @@
 
 	close(fd);
 
+	rc = get_immutable(path);
+	if (rc < 0) {
+		perror("ioctl(FS_IOC_GETFLAGS)");
+		return EXIT_FAILURE;
+	} else if (rc) {
+		rc = set_immutable(path, 0);
+		if (rc < 0) {
+			perror("ioctl(FS_IOC_SETFLAGS)");
+			return EXIT_FAILURE;
+		}
+	}
+
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
 		perror("open");
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
index 773e276..1e1abe0 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
@@ -39,28 +39,23 @@
 }
 
 instance_slam &
-x=`jobs -l`
-p1=`echo $x | cut -d' ' -f2`
+p1=$!
 echo $p1
 
 instance_slam &
-x=`jobs -l | tail -1`
-p2=`echo $x | cut -d' ' -f2`
+p2=$!
 echo $p2
 
 instance_slam &
-x=`jobs -l | tail -1`
-p3=`echo $x | cut -d' ' -f2`
+p3=$!
 echo $p3
 
 instance_slam &
-x=`jobs -l | tail -1`
-p4=`echo $x | cut -d' ' -f2`
+p4=$!
 echo $p4
 
 instance_slam &
-x=`jobs -l | tail -1`
-p5=`echo $x | cut -d' ' -f2`
+p5=$!
 echo $p5
 
 ls -lR >/dev/null
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 844787a..5eb49b7 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -33,7 +33,7 @@
 then
 	print_warning Console output contains nul bytes, old qemu still running?
 fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
 if test -s $1.diags
 then
 	print_warning Assertion failure in $file $title
@@ -64,10 +64,12 @@
 	then
 		summary="$summary  lockdep: $n_badness"
 	fi
-	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1`
+	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
 	if test "$n_stalls" -ne 0
 	then
 		summary="$summary  Stalls: $n_stalls"
 	fi
 	print_warning Summary: $summary
+else
+	rm $1.diags
 fi
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index e86d937..60fe3c5 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -45,7 +45,17 @@
 }
 #endif
 
-#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
@@ -193,10 +203,137 @@
 }
 
 
+int set_offset(long long offset, int use_nano)
+{
+	struct timex tmx = {};
+	int ret;
+
+	tmx.modes = ADJ_SETOFFSET;
+	if (use_nano) {
+		tmx.modes |= ADJ_NANO;
+
+		tmx.time.tv_sec = offset / NSEC_PER_SEC;
+		tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+		if (offset < 0 && tmx.time.tv_usec) {
+			tmx.time.tv_sec -= 1;
+			tmx.time.tv_usec += NSEC_PER_SEC;
+		}
+	} else {
+		tmx.time.tv_sec = offset / USEC_PER_SEC;
+		tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+		if (offset < 0 && tmx.time.tv_usec) {
+			tmx.time.tv_sec -= 1;
+			tmx.time.tv_usec += USEC_PER_SEC;
+		}
+	}
+
+	ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+	if (ret < 0) {
+		printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+		printf("[FAIL]\n");
+		return -1;
+	}
+	return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+	struct timex tmx = {};
+	int ret;
+
+	tmx.modes = ADJ_SETOFFSET;
+	if (use_nano)
+		tmx.modes |= ADJ_NANO;
+
+	tmx.time.tv_sec = sec;
+	tmx.time.tv_usec = usec;
+	ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+	if (ret >= 0) {
+		printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+		printf("[FAIL]\n");
+		return -1;
+	}
+	return 0;
+}
+
+int validate_set_offset(void)
+{
+	printf("Testing ADJ_SETOFFSET... ");
+
+	/* Test valid values */
+	if (set_offset(NSEC_PER_SEC - 1, 1))
+		return -1;
+
+	if (set_offset(-NSEC_PER_SEC + 1, 1))
+		return -1;
+
+	if (set_offset(-NSEC_PER_SEC - 1, 1))
+		return -1;
+
+	if (set_offset(5 * NSEC_PER_SEC, 1))
+		return -1;
+
+	if (set_offset(-5 * NSEC_PER_SEC, 1))
+		return -1;
+
+	if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+		return -1;
+
+	if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+		return -1;
+
+	if (set_offset(USEC_PER_SEC - 1, 0))
+		return -1;
+
+	if (set_offset(-USEC_PER_SEC + 1, 0))
+		return -1;
+
+	if (set_offset(-USEC_PER_SEC - 1, 0))
+		return -1;
+
+	if (set_offset(5 * USEC_PER_SEC, 0))
+		return -1;
+
+	if (set_offset(-5 * USEC_PER_SEC, 0))
+		return -1;
+
+	if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+		return -1;
+
+	if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+		return -1;
+
+	/* Test invalid values */
+	if (set_bad_offset(0, -1, 1))
+		return -1;
+	if (set_bad_offset(0, -1, 0))
+		return -1;
+	if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+		return -1;
+	if (set_bad_offset(0, NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, USEC_PER_SEC, 0))
+		return -1;
+	if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, -USEC_PER_SEC, 0))
+		return -1;
+
+	printf("[OK]\n");
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	if (validate_freq())
 		return ksft_exit_fail();
 
+	if (validate_set_offset())
+		return ksft_exit_fail();
+
 	return ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d0c473f..d5ce7d7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,15 +4,16 @@
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
+			check_initial_reg_state sigreturn ldt_gdt
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
-			ldt_gdt \
 			vdso_restorer
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
-BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
+BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
 
@@ -40,7 +41,7 @@
 $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
 	$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
 
-$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
+$(TARGETS_C_64BIT_ALL:%=%_64): %_64: %.c
 	$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
 
 # x86_64 users should be encouraged to install 32-bit libraries
@@ -65,3 +66,9 @@
 sysret_ss_attrs_64: thunks.S
 ptrace_syscall_32: raw_syscall_helper_32.S
 test_syscall_vdso_32: thunks_32.S
+
+# check_initial_reg_state is special: it needs a custom entry, and it
+# needs to be static so that its interpreter doesn't destroy its initial
+# state.
+check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c
new file mode 100644
index 0000000..6aaed9b8
--- /dev/null
+++ b/tools/testing/selftests/x86/check_initial_reg_state.c
@@ -0,0 +1,109 @@
+/*
+ * check_initial_reg_state.c - check that execve sets the correct state
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+
+unsigned long ax, bx, cx, dx, si, di, bp, sp, flags;
+unsigned long r8, r9, r10, r11, r12, r13, r14, r15;
+
+asm (
+	".pushsection .text\n\t"
+	".type real_start, @function\n\t"
+	".global real_start\n\t"
+	"real_start:\n\t"
+#ifdef __x86_64__
+	"mov %rax, ax\n\t"
+	"mov %rbx, bx\n\t"
+	"mov %rcx, cx\n\t"
+	"mov %rdx, dx\n\t"
+	"mov %rsi, si\n\t"
+	"mov %rdi, di\n\t"
+	"mov %rbp, bp\n\t"
+	"mov %rsp, sp\n\t"
+	"mov %r8, r8\n\t"
+	"mov %r9, r9\n\t"
+	"mov %r10, r10\n\t"
+	"mov %r11, r11\n\t"
+	"mov %r12, r12\n\t"
+	"mov %r13, r13\n\t"
+	"mov %r14, r14\n\t"
+	"mov %r15, r15\n\t"
+	"pushfq\n\t"
+	"popq flags\n\t"
+#else
+	"mov %eax, ax\n\t"
+	"mov %ebx, bx\n\t"
+	"mov %ecx, cx\n\t"
+	"mov %edx, dx\n\t"
+	"mov %esi, si\n\t"
+	"mov %edi, di\n\t"
+	"mov %ebp, bp\n\t"
+	"mov %esp, sp\n\t"
+	"pushfl\n\t"
+	"popl flags\n\t"
+#endif
+	"jmp _start\n\t"
+	".size real_start, . - real_start\n\t"
+	".popsection");
+
+int main()
+{
+	int nerrs = 0;
+
+	if (sp == 0) {
+		printf("[FAIL]\tTest was built incorrectly\n");
+		return 1;
+	}
+
+	if (ax || bx || cx || dx || si || di || bp
+#ifdef __x86_64__
+	    || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15
+#endif
+		) {
+		printf("[FAIL]\tAll GPRs except SP should be 0\n");
+#define SHOW(x) printf("\t" #x " = 0x%lx\n", x);
+		SHOW(ax);
+		SHOW(bx);
+		SHOW(cx);
+		SHOW(dx);
+		SHOW(si);
+		SHOW(di);
+		SHOW(bp);
+		SHOW(sp);
+#ifdef __x86_64__
+		SHOW(r8);
+		SHOW(r9);
+		SHOW(r10);
+		SHOW(r11);
+		SHOW(r12);
+		SHOW(r13);
+		SHOW(r14);
+		SHOW(r15);
+#endif
+		nerrs++;
+	} else {
+		printf("[OK]\tAll GPRs except SP are 0\n");
+	}
+
+	if (flags != 0x202) {
+		printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags);
+		nerrs++;
+	} else {
+		printf("[OK]\tFLAGS is 0x202\n");
+	}
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 5105b49..4214567 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -103,6 +103,17 @@
 		err(1, "sigaction");
 }
 
+static void setsigign(int sig, int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = (void *)SIG_IGN;
+	sa.sa_flags = flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
 static void clearhandler(int sig)
 {
 	struct sigaction sa;
@@ -187,7 +198,7 @@
 
 	printf("[RUN]\tSYSEMU\n");
 	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
-		err(1, "PTRACE_SYSCALL");
+		err(1, "PTRACE_SYSEMU");
 	wait_trap(chld);
 
 	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
@@ -218,7 +229,7 @@
 		err(1, "PTRACE_SETREGS");
 
 	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
-		err(1, "PTRACE_SYSCALL");
+		err(1, "PTRACE_SYSEMU");
 	wait_trap(chld);
 
 	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
@@ -250,7 +261,7 @@
 		err(1, "PTRACE_SETREGS");
 
 	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
-		err(1, "PTRACE_SYSCALL");
+		err(1, "PTRACE_SYSEMU");
 	wait_trap(chld);
 
 	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
@@ -277,6 +288,119 @@
 	}
 }
 
+static void test_restart_under_ptrace(void)
+{
+	printf("[RUN]\tkernel syscall restart under ptrace\n");
+	pid_t chld = fork();
+	if (chld < 0)
+		err(1, "fork");
+
+	if (chld == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		printf("\tChild will take a nap until signaled\n");
+		setsigign(SIGUSR1, SA_RESTART);
+		raise(SIGSTOP);
+
+		syscall(SYS_pause, 0, 0, 0, 0, 0, 0);
+		_exit(0);
+	}
+
+	int status;
+
+	/* Wait for SIGSTOP. */
+	if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+		err(1, "waitpid");
+
+	struct user_regs_struct regs;
+
+	printf("[RUN]\tSYSCALL\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	/* We should be stopped at pause(2) entry. */
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tInitial nr and args are correct\n");
+	}
+
+	/* Interrupt it. */
+	kill(chld, SIGUSR1);
+
+	/* Advance.  We should be stopped at exit. */
+	printf("[RUN]\tSYSCALL\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n",
+		       (long)regs.user_ax);
+	}
+
+	/* Poke the regs back in.  This must not break anything. */
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	/* Catch the (ignored) SIGUSR1. */
+	if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+		err(1, "PTRACE_CONT");
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+	if (!WIFSTOPPED(status)) {
+		printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status);
+		nerrs++;
+	} else {
+		printf("[OK]\tChild got SIGUSR1\n");
+	}
+
+	/* The next event should be pause(2) again. */
+	printf("[RUN]\tStep again\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	/* We should be stopped at pause(2) entry. */
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tpause(2) restarted correctly\n");
+	}
+
+	/* Kill it. */
+	kill(chld, SIGKILL);
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+}
+
 int main()
 {
 	printf("[RUN]\tCheck int80 return regs\n");
@@ -290,5 +414,7 @@
 
 	test_ptrace_syscall_restart();
 
+	test_restart_under_ptrace();
+
 	return 0;
 }
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index b5aa1ba..8a577e7 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -54,6 +54,37 @@
 #include <sys/ptrace.h>
 #include <sys/user.h>
 
+/* Pull in AR_xyz defines. */
+typedef unsigned int u32;
+typedef unsigned short u16;
+#include "../../../../arch/x86/include/asm/desc_defs.h"
+
+/*
+ * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
+ * headers.
+ */
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ *     saved CS is not 64-bit)
+ *         new SS = saved SS  (will fail IRET and signal if invalid)
+ * else
+ *         new SS = a flat 32-bit data segment
+ */
+#define UC_SIGCONTEXT_SS       0x2
+#define UC_STRICT_RESTORE_SS   0x4
+#endif
+
 /*
  * In principle, this test can run on Linux emulation layers (e.g.
  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
@@ -267,6 +298,9 @@
 /* Instructions for the SIGUSR1 handler. */
 static volatile unsigned short sig_cs, sig_ss;
 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+#ifdef __x86_64__
+static volatile sig_atomic_t sig_corrupt_final_ss;
+#endif
 
 /* Abstractions for some 32-bit vs 64-bit differences. */
 #ifdef __x86_64__
@@ -305,62 +339,6 @@
 }
 #endif
 
-/* Number of errors in the current test case. */
-static volatile sig_atomic_t nerrs;
-
-/*
- * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
- * int3 trampoline.  Sets SP to a large known value so that we can see
- * whether the value round-trips back to user mode correctly.
- */
-static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
-{
-	ucontext_t *ctx = (ucontext_t*)ctx_void;
-
-	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-
-	*csptr(ctx) = sig_cs;
-	*ssptr(ctx) = sig_ss;
-
-	ctx->uc_mcontext.gregs[REG_IP] =
-		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
-	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
-	ctx->uc_mcontext.gregs[REG_AX] = 0;
-
-	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
-
-	return;
-}
-
-/*
- * Called after a successful sigreturn.  Restores our state so that
- * the original raise(SIGUSR1) returns.
- */
-static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
-{
-	ucontext_t *ctx = (ucontext_t*)ctx_void;
-
-	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
-	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
-
-	unsigned short ss;
-	asm ("mov %%ss,%0" : "=r" (ss));
-
-	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
-	if (asm_ss != sig_ss && sig == SIGTRAP) {
-		/* Sanity check failure. */
-		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
-		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
-		nerrs++;
-	}
-
-	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
-
-	sig_trapped = sig;
-}
-
 /*
  * Checks a given selector for its code bitness or returns -1 if it's not
  * a usable code segment selector.
@@ -394,6 +372,184 @@
 		return -1;	/* Unknown bitness. */
 }
 
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+bool is_valid_ss(unsigned short cs)
+{
+	uint32_t valid = 0, ar;
+	asm ("lar %[cs], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "mov $1, %[valid]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [valid] "+rm" (valid)
+	     : [cs] "r" (cs));
+
+	if (!valid)
+		return false;
+
+	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
+	    (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
+		return false;
+
+	return (ar & AR_P);
+}
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+static void validate_signal_ss(int sig, ucontext_t *ctx)
+{
+#ifdef __x86_64__
+	bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
+
+	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
+		printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
+		nerrs++;
+
+		/*
+		 * This happens on Linux 4.1.  The rest will fail, too, so
+		 * return now to reduce the noise.
+		 */
+		return;
+	}
+
+	/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
+	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
+		printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
+		       sig);
+		nerrs++;
+	}
+
+	if (is_valid_ss(*ssptr(ctx))) {
+		/*
+		 * DOSEMU was written before 64-bit sigcontext had SS, and
+		 * it tries to figure out the signal source SS by looking at
+		 * the physical register.  Make sure that keeps working.
+		 */
+		unsigned short hw_ss;
+		asm ("mov %%ss, %0" : "=rm" (hw_ss));
+		if (hw_ss != *ssptr(ctx)) {
+			printf("[FAIL]\tHW SS didn't match saved SS\n");
+			nerrs++;
+		}
+	}
+#endif
+}
+
+/*
+ * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
+ * int3 trampoline.  Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	validate_signal_ss(sig, ctx);
+
+	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+	*csptr(ctx) = sig_cs;
+	*ssptr(ctx) = sig_ss;
+
+	ctx->uc_mcontext.gregs[REG_IP] =
+		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+	ctx->uc_mcontext.gregs[REG_AX] = 0;
+
+	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
+
+	return;
+}
+
+/*
+ * Called after a successful sigreturn (via int3) or from a failed
+ * sigreturn (directly by kernel).  Restores our state so that the
+ * original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	validate_signal_ss(sig, ctx);
+
+	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+	unsigned short ss;
+	asm ("mov %%ss,%0" : "=r" (ss));
+
+	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+	if (asm_ss != sig_ss && sig == SIGTRAP) {
+		/* Sanity check failure. */
+		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
+		nerrs++;
+	}
+
+	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+#ifdef __x86_64__
+	if (sig_corrupt_final_ss) {
+		if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
+			printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
+			nerrs++;
+		} else {
+			/*
+			 * DOSEMU transitions from 32-bit to 64-bit mode by
+			 * adjusting sigcontext, and it requires that this work
+			 * even if the saved SS is bogus.
+			 */
+			printf("\tCorrupting SS on return to 64-bit mode\n");
+			*ssptr(ctx) = 0;
+		}
+	}
+#endif
+
+	sig_trapped = sig;
+}
+
+#ifdef __x86_64__
+/* Tests recovery if !UC_STRICT_RESTORE_SS */
+static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
+		printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
+		nerrs++;
+		return;  /* We can't do the rest. */
+	}
+
+	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
+	*ssptr(ctx) = 0;
+
+	/* Return.  The kernel should recover without sending another signal. */
+}
+
+static int test_nonstrict_ss(void)
+{
+	clearhandler(SIGUSR1);
+	clearhandler(SIGTRAP);
+	clearhandler(SIGSEGV);
+	clearhandler(SIGILL);
+	sethandler(SIGUSR2, sigusr2, 0);
+
+	nerrs = 0;
+
+	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
+	raise(SIGUSR2);
+	if (!nerrs)
+		printf("[OK]\tIt worked\n");
+
+	return nerrs;
+}
+#endif
+
 /* Finds a usable code segment of the requested bitness. */
 int find_cs(int bitness)
 {
@@ -576,6 +732,12 @@
 		       errdesc, strsignal(sig_trapped));
 		return 0;
 	} else {
+		/*
+		 * This also implicitly tests UC_STRICT_RESTORE_SS:
+		 * We check that these signals set UC_STRICT_RESTORE_SS and,
+		 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
+		 * then we won't get SIGSEGV.
+		 */
 		printf("[FAIL]\tDid not get SIGSEGV\n");
 		return 1;
 	}
@@ -632,6 +794,14 @@
 						    GDT3(gdt_data16_idx));
 	}
 
+#ifdef __x86_64__
+	/* Nasty ABI case: check SS corruption handling. */
+	sig_corrupt_final_ss = 1;
+	total_nerrs += test_valid_sigreturn(32, false, -1);
+	total_nerrs += test_valid_sigreturn(32, true, -1);
+	sig_corrupt_final_ss = 0;
+#endif
+
 	/*
 	 * We're done testing valid sigreturn cases.  Now we test states
 	 * for which sigreturn itself will succeed but the subsequent
@@ -680,5 +850,9 @@
 	if (gdt_npdata32_idx)
 		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
 
+#ifdef __x86_64__
+	total_nerrs += test_nonstrict_ss();
+#endif
+
 	return total_nerrs ? 1 : 0;
 }
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index 60c06af4..43fcab36 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -17,6 +17,9 @@
 
 #include <stdio.h>
 #include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <err.h>
 #include <sys/syscall.h>
 #include <asm/processor-flags.h>
 
@@ -26,6 +29,8 @@
 # define WIDTH "l"
 #endif
 
+static unsigned int nerrs;
+
 static unsigned long get_eflags(void)
 {
 	unsigned long eflags;
@@ -39,16 +44,52 @@
 		      : : "rm" (eflags) : "flags");
 }
 
-int main()
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void do_it(unsigned long extraflags)
+{
+	unsigned long flags;
+
+	set_eflags(get_eflags() | extraflags);
+	syscall(SYS_getpid);
+	flags = get_eflags();
+	if ((flags & extraflags) == extraflags) {
+		printf("[OK]\tThe syscall worked and flags are still set\n");
+	} else {
+		printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n",
+		       flags, extraflags);
+		nerrs++;
+	}
+}
+
+int main(void)
 {
 	printf("[RUN]\tSet NT and issue a syscall\n");
-	set_eflags(get_eflags() | X86_EFLAGS_NT);
-	syscall(SYS_getpid);
-	if (get_eflags() & X86_EFLAGS_NT) {
-		printf("[OK]\tThe syscall worked and NT is still set\n");
-		return 0;
-	} else {
-		printf("[FAIL]\tThe syscall worked but NT was cleared\n");
-		return 1;
-	}
+	do_it(X86_EFLAGS_NT);
+
+	/*
+	 * Now try it again with TF set -- TF forces returns via IRET in all
+	 * cases except non-ptregs-using 64-bit full fast path syscalls.
+	 */
+
+	sethandler(SIGTRAP, sigtrap, 0);
+
+	printf("[RUN]\tSet NT|TF and issue a syscall\n");
+	do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+
+	return nerrs == 0 ? 0 : 1;
 }
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926..ba34f9e 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()	mb()
-# define dma_rmb()	barrier()
-# define dma_wmb()	barrier()
-# define smp_rmb()	barrier()
-# define smp_wmb()	barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+	typeof(var) virt_store_mb_value = (value); \
+	__atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+			  __ATOMIC_SEQ_CST); \
+	barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb()	abort()
-# define wmb()	abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 0000000..845960e
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+	(*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d56..0338499 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 0000000..feaa64a
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+	-rm main.o
+	-rm ring.o ring
+	-rm virtio_ring_0_9.o virtio_ring_0_9
+	-rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 0000000..34e94c4
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 0000000..3a5ff43
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = write(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = read(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void kick(void)
+{
+	notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+	wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+	notify(callfd);
+}
+
+void wait_for_call(void)
+{
+	wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+	cpu_set_t cpuset;
+	int ret;
+	pthread_t self;
+	long int cpu;
+	char *endptr;
+
+	if (!arg)
+		return;
+
+	cpu = strtol(arg, &endptr, 0);
+	assert(!*endptr);
+
+	assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+	self = pthread_self();
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+	assert(!ret);
+}
+
+static void run_guest(void)
+{
+	int completed_before;
+	int completed = 0;
+	int started = 0;
+	int bufs = runcycles;
+	int spurious = 0;
+	int r;
+	unsigned len;
+	void *buf;
+	int tokick = batch;
+
+	for (;;) {
+		if (do_sleep)
+			disable_call();
+		completed_before = completed;
+		do {
+			if (started < bufs &&
+			    started - completed < max_outstanding) {
+				r = add_inbuf(0, NULL, "Hello, world!");
+				if (__builtin_expect(r == 0, true)) {
+					++started;
+					if (!--tokick) {
+						tokick = batch;
+						if (do_sleep)
+							kick_available();
+					}
+
+				}
+			} else
+				r = -1;
+
+			/* Flush out completed bufs if any */
+			if (get_buf(&len, &buf)) {
+				++completed;
+				if (__builtin_expect(completed == bufs, false))
+					return;
+				r = 0;
+			}
+		} while (r == 0);
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		assert(started <= bufs);
+		if (do_sleep) {
+			if (enable_call())
+				wait_for_call();
+		} else {
+			poll_used();
+		}
+	}
+}
+
+static void run_host(void)
+{
+	int completed_before;
+	int completed = 0;
+	int spurious = 0;
+	int bufs = runcycles;
+	unsigned len;
+	void *buf;
+
+	for (;;) {
+		if (do_sleep) {
+			if (enable_kick())
+				wait_for_kick();
+		} else {
+			poll_avail();
+		}
+		if (do_sleep)
+			disable_kick();
+		completed_before = completed;
+		while (__builtin_expect(use_buf(&len, &buf), true)) {
+			if (do_sleep)
+				call_used();
+			++completed;
+			if (__builtin_expect(completed == bufs, false))
+				return;
+		}
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		if (completed == bufs)
+			break;
+	}
+}
+
+void *start_guest(void *arg)
+{
+	set_affinity(arg);
+	run_guest();
+	pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+	set_affinity(arg);
+	run_host();
+	pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.val = 'h',
+	},
+	{
+		.name = "host-affinity",
+		.has_arg = required_argument,
+		.val = 'H',
+	},
+	{
+		.name = "guest-affinity",
+		.has_arg = required_argument,
+		.val = 'G',
+	},
+	{
+		.name = "ring-size",
+		.has_arg = required_argument,
+		.val = 'R',
+	},
+	{
+		.name = "run-cycles",
+		.has_arg = required_argument,
+		.val = 'C',
+	},
+	{
+		.name = "outstanding",
+		.has_arg = required_argument,
+		.val = 'o',
+	},
+	{
+		.name = "batch",
+		.has_arg = required_argument,
+		.val = 'b',
+	},
+	{
+		.name = "sleep",
+		.has_arg = no_argument,
+		.val = 's',
+	},
+	{
+		.name = "relax",
+		.has_arg = no_argument,
+		.val = 'x',
+	},
+	{
+		.name = "exit",
+		.has_arg = no_argument,
+		.val = 'e',
+	},
+	{
+	}
+};
+
+static void help(void)
+{
+	fprintf(stderr, "Usage: <test> [--help]"
+		" [--host-affinity H]"
+		" [--guest-affinity G]"
+		" [--ring-size R (default: %d)]"
+		" [--run-cycles C (default: %d)]"
+		" [--batch b]"
+		" [--outstanding o]"
+		" [--sleep]"
+		" [--relax]"
+		" [--exit]"
+		"\n",
+		ring_size,
+		runcycles);
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	pthread_t host, guest;
+	void *tret;
+	char *host_arg = NULL;
+	char *guest_arg = NULL;
+	char *endptr;
+	long int c;
+
+	kickfd = eventfd(0, 0);
+	assert(kickfd >= 0);
+	callfd = eventfd(0, 0);
+	assert(callfd >= 0);
+
+	for (;;) {
+		int o = getopt_long(argc, argv, optstring, longopts, NULL);
+		switch (o) {
+		case -1:
+			goto done;
+		case '?':
+			help();
+			exit(2);
+		case 'H':
+			host_arg = optarg;
+			break;
+		case 'G':
+			guest_arg = optarg;
+			break;
+		case 'R':
+			ring_size = strtol(optarg, &endptr, 0);
+			assert(ring_size && !(ring_size & (ring_size - 1)));
+			assert(!*endptr);
+			break;
+		case 'C':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			runcycles = c;
+			break;
+		case 'o':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			max_outstanding = c;
+			break;
+		case 'b':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			batch = c;
+			break;
+		case 's':
+			do_sleep = true;
+			break;
+		case 'x':
+			do_relax = true;
+			break;
+		case 'e':
+			do_exit = true;
+			break;
+		default:
+			help();
+			exit(4);
+			break;
+		}
+	}
+
+	/* does nothing here, used to make sure all smp APIs compile */
+	smp_acquire();
+	smp_release();
+	smp_mb();
+done:
+
+	if (batch > max_outstanding)
+		batch = max_outstanding;
+
+	if (optind < argc) {
+		help();
+		exit(4);
+	}
+	alloc_ring();
+
+	ret = pthread_create(&host, NULL, start_host, host_arg);
+	assert(!ret);
+	ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+	assert(!ret);
+
+	ret = pthread_join(guest, &tret);
+	assert(!ret);
+	ret = pthread_join(host, &tret);
+	assert(!ret);
+	return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 0000000..16917ac
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+	unsigned long long t;
+
+	t = __rdtsc();
+	while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+	_Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+	if (do_relax)
+		cpu_relax();
+	else
+		/* prevent compiler from removing busy loops */
+		barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 0000000..c25c8d2
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+			      unsigned short next,
+			      unsigned short prev)
+{
+	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+	unsigned short flags;
+	unsigned short index;
+	unsigned len;
+	unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+	unsigned short kick_index;
+	unsigned char reserved0[HOST_GUEST_PADDING - 2];
+	unsigned short call_index;
+	unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+	void *buf; /* descriptor is writeable, we can't get buf from there */
+	void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+	unsigned avail_idx;
+	unsigned last_used_idx;
+	unsigned num_free;
+	unsigned kicked_avail_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned used_idx;
+	unsigned called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+
+	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	event = malloc(sizeof *event);
+	if (!event) {
+		perror("Unable to allocate event buffer.\n");
+		exit(3);
+	}
+	memset(event, 0, sizeof *event);
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	for (i = 0; i < ring_size; ++i) {
+		struct desc desc = {
+			.index = i,
+		};
+		ring[i] = desc;
+	}
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, index;
+
+	if (!guest.num_free)
+		return -1;
+
+	guest.num_free--;
+	head = (ring_size - 1) & (guest.avail_idx++);
+
+	/* Start with a write. On MESI architectures this helps
+	 * avoid a shared state with consumer that is polling this descriptor.
+	 */
+	ring[head].addr = (unsigned long)(void*)buf;
+	ring[head].len = len;
+	/* read below might bypass write above. That is OK because it's just an
+	 * optimization. If this happens, we will get the cache line in a
+	 * shared state which is unfortunate, but probably not worth it to
+	 * add an explicit full barrier to avoid this.
+	 */
+	barrier();
+	index = ring[head].index;
+	data[index].buf = buf;
+	data[index].data = datap;
+	/* Barrier A (for pairing) */
+	smp_release();
+	ring[head].flags = DESC_HW;
+
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+	unsigned index;
+	void *datap;
+
+	if (ring[head].flags & DESC_HW)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	*lenp = ring[head].len;
+	index = ring[head].index & (ring_size - 1);
+	datap = data[index].data;
+	*bufp = data[index].buf;
+	data[index].buf = NULL;
+	data[index].data = NULL;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+void poll_used(void)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	while (ring[head].flags & DESC_HW)
+		busy_wait();
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	event->call_index = guest.last_used_idx;
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	if (!need_event(event->kick_index,
+			guest.avail_idx,
+			guest.kicked_avail_idx))
+		return;
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	event->kick_index = host.used_idx;
+	/* Barrier C (for pairing) */
+	smp_mb();
+	return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	while (!(ring[head].flags & DESC_HW))
+		busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	if (!(ring[head].flags & DESC_HW))
+		return false;
+
+	/* make sure length read below is not speculated */
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	/* simple in-order completion: we don't need
+	 * to touch index at all. This also means we
+	 * can just modify the descriptor in-place.
+	 */
+	ring[head].len--;
+	/* Make sure len is valid before flags.
+	 * Note: alternative is to write len and flags in one access -
+	 * possible on 64 bit architectures but wmb is free on Intel anyway
+	 * so I have no way to test whether it's a gain.
+	 */
+	/* Barrier B (for pairing) */
+	smp_release();
+	ring[head].flags = 0;
+	host.used_idx++;
+	return true;
+}
+
+void call_used(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	if (!need_event(event->call_index,
+			host.used_idx,
+			host.called_used_idx))
+		return;
+
+	host.called_used_idx = host.used_idx;
+	call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 0000000..52b0f71
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+	#Don't run guest and host on same CPU
+	#It actually works ok if using signalling
+	if
+		(echo "$@" | grep -e "--sleep" > /dev/null) || \
+			test $HOST_AFFINITY '!=' $cpu
+	then
+		echo "GUEST AFFINITY $cpu"
+		"$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+	fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 0000000..47c9a1a
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+	void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+	unsigned short avail_idx;
+	unsigned short last_used_idx;
+	unsigned short num_free;
+	unsigned short kicked_avail_idx;
+	unsigned short free_head;
+	unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned short used_idx;
+	unsigned short called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+	void *p;
+
+	ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	memset(p, 0, vring_size(ring_size, 0x1000));
+	vring_init(&ring, ring_size, p, 0x1000);
+
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	/* Put everything in free lists. */
+	guest.free_head = 0;
+	for (i = 0; i < ring_size - 1; i++)
+		ring.desc[i].next = i + 1;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, avail;
+	struct vring_desc *desc;
+
+	if (!guest.num_free)
+		return -1;
+
+	head = guest.free_head;
+	guest.num_free--;
+
+	desc = ring.desc;
+	desc[head].flags = VRING_DESC_F_NEXT;
+	desc[head].addr = (unsigned long)(void *)buf;
+	desc[head].len = len;
+	/* We do it like this to simulate the way
+	 * we'd have to flip it if we had multiple
+	 * descriptors.
+	 */
+	desc[head].flags &= ~VRING_DESC_F_NEXT;
+	guest.free_head = desc[head].next;
+
+	data[head].data = datap;
+
+#ifdef RING_POLL
+	/* Barrier A (for pairing) */
+	smp_release();
+	avail = guest.avail_idx++;
+	ring.avail->ring[avail & (ring_size - 1)] =
+		(head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+	avail = (ring_size - 1) & (guest.avail_idx++);
+	ring.avail->ring[avail] = head;
+	/* Barrier A (for pairing) */
+	smp_release();
+#endif
+	ring.avail->idx = guest.avail_idx;
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head;
+	unsigned index;
+	void *datap;
+
+#ifdef RING_POLL
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+	if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	index &= ring_size - 1;
+#else
+	if (ring.used->idx == guest.last_used_idx)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+#endif
+	*lenp = ring.used->ring[head].len;
+	datap = data[index].data;
+	*bufp = (void*)(unsigned long)ring.desc[index].addr;
+	data[index].data = NULL;
+	ring.desc[index].next = guest.free_head;
+	guest.free_head = index;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	for (;;) {
+		unsigned index = ring.used->ring[head].id;
+
+		if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+			busy_wait();
+		else
+			break;
+	}
+#else
+	unsigned head = guest.last_used_idx;
+
+	while (ring.used->idx == head)
+		busy_wait();
+#endif
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	unsigned short last_used_idx;
+
+	vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+#ifdef RING_POLL
+	{
+		unsigned short head = last_used_idx & (ring_size - 1);
+		unsigned index = ring.used->ring[head].id;
+
+		return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+	}
+#else
+	return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	if (!vring_need_event(vring_avail_event(&ring),
+			      guest.avail_idx,
+			      guest.kicked_avail_idx))
+		return;
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	unsigned head = host.used_idx;
+
+	vring_avail_event(&ring) = head;
+	/* Barrier C (for pairing) */
+	smp_mb();
+#ifdef RING_POLL
+	{
+		unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+		return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+	}
+#else
+	return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+	unsigned head = host.used_idx;
+#ifdef RING_POLL
+	for (;;) {
+		unsigned index = ring.avail->ring[head & (ring_size - 1)];
+		if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+			busy_wait();
+		else
+			break;
+	}
+#else
+	while (ring.avail->idx == head)
+		busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned used_idx = host.used_idx;
+	struct vring_desc *desc;
+	unsigned head;
+
+#ifdef RING_POLL
+	head = ring.avail->ring[used_idx & (ring_size - 1)];
+	if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+		return false;
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	desc = &ring.desc[head & (ring_size - 1)];
+#else
+	if (used_idx == ring.avail->idx)
+		return false;
+
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	head = ring.avail->ring[used_idx];
+	desc = &ring.desc[head];
+#endif
+
+	*lenp = desc->len;
+	*bufp = (void *)(unsigned long)desc->addr;
+
+	/* now update used ring */
+	ring.used->ring[used_idx].id = head;
+	ring.used->ring[used_idx].len = desc->len - 1;
+	/* Barrier B (for pairing) */
+	smp_release();
+	host.used_idx++;
+	ring.used->idx = host.used_idx;
+	
+	return true;
+}
+
+void call_used(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	if (!vring_need_event(vring_used_event(&ring),
+			      host.used_idx,
+			      host.called_used_idx))
+		return;
+
+	host.called_used_idx = host.used_idx;
+	call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 0000000..84fc2c5
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 69bca18..ea60646 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -143,7 +143,7 @@
  * Check if there was a change in the timer state (should we raise or lower
  * the line level to the GIC).
  */
-static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
+static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
@@ -154,10 +154,12 @@
 	 * until we call this function from kvm_timer_flush_hwstate.
 	 */
 	if (!vgic_initialized(vcpu->kvm))
-	    return;
+		return -ENODEV;
 
 	if (kvm_timer_should_fire(vcpu) != timer->irq.level)
 		kvm_timer_update_irq(vcpu, !timer->irq.level);
+
+	return 0;
 }
 
 /*
@@ -218,7 +220,8 @@
 	bool phys_active;
 	int ret;
 
-	kvm_timer_update_state(vcpu);
+	if (kvm_timer_update_state(vcpu))
+		return;
 
 	/*
 	* If we enter the guest with the virtual input level to the VGIC
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 043032c..00429b3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1875,8 +1875,8 @@
 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
-	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+	int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+	int sz = nr_longs * sizeof(unsigned long);
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 3531599..65da997 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -97,8 +97,8 @@
 	 * This memory barrier pairs with prepare_to_wait's set_current_state()
 	 */
 	smp_mb();
-	if (waitqueue_active(&vcpu->wq))
-		wake_up_interruptible(&vcpu->wq);
+	if (swait_active(&vcpu->wq))
+		swake_up(&vcpu->wq);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
@@ -172,7 +172,7 @@
 	 * do alloc nowait since if we are going to sleep anyway we
 	 * may as well sleep faulting in page
 	 */
-	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
+	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
 	if (!work)
 		return 0;
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a11cfd2..5af50c3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -216,8 +216,7 @@
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
 	vcpu->pid = NULL;
-	vcpu->halt_poll_ns = 0;
-	init_waitqueue_head(&vcpu->wq);
+	init_swait_queue_head(&vcpu->wq);
 	kvm_async_pf_vcpu_init(vcpu);
 
 	vcpu->pre_pcpu = -1;
@@ -1952,6 +1951,9 @@
 	else
 		val *= halt_poll_ns_grow;
 
+	if (val > halt_poll_ns)
+		val = halt_poll_ns;
+
 	vcpu->halt_poll_ns = val;
 	trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
 }
@@ -1990,7 +1992,7 @@
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
 	ktime_t start, cur;
-	DEFINE_WAIT(wait);
+	DECLARE_SWAITQUEUE(wait);
 	bool waited = false;
 	u64 block_ns;
 
@@ -2015,7 +2017,7 @@
 	kvm_arch_vcpu_blocking(vcpu);
 
 	for (;;) {
-		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (kvm_vcpu_check_block(vcpu) < 0)
 			break;
@@ -2024,7 +2026,7 @@
 		schedule();
 	}
 
-	finish_wait(&vcpu->wq, &wait);
+	finish_swait(&vcpu->wq, &wait);
 	cur = ktime_get();
 
 	kvm_arch_vcpu_unblocking(vcpu);
@@ -2056,11 +2058,11 @@
 {
 	int me;
 	int cpu = vcpu->cpu;
-	wait_queue_head_t *wqp;
+	struct swait_queue_head *wqp;
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (waitqueue_active(wqp)) {
-		wake_up_interruptible(wqp);
+	if (swait_active(wqp)) {
+		swake_up(wqp);
 		++vcpu->stat.halt_wakeup;
 	}
 
@@ -2161,7 +2163,7 @@
 				continue;
 			if (vcpu == me)
 				continue;
-			if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
+			if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;